Coverage for enderchest/sync/sftp.py: 89%
180 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-05-04 01:41 +0000
« prev ^ index » next coverage.py v7.5.0, created at 2024-05-04 01:41 +0000
1"""paramiko-based sftp sync implementation"""
3import os
4import posixpath
5import stat
6from contextlib import contextmanager
7from pathlib import Path
8from typing import Any, Collection, Generator
9from urllib.parse import ParseResult, unquote
10from urllib.request import url2pathname
12import paramiko
14from ..prompt import prompt
15from . import (
16 SYNC_LOGGER,
17 Op,
18 diff,
19 file,
20 filter_contents,
21 generate_sync_report,
22 is_identical,
23)
26@contextmanager
27def connect(
28 uri: ParseResult, timeout: float | None = None
29) -> Generator[paramiko.sftp_client.SFTPClient, None, None]:
30 """Yield an SFTPClient connected to the server specified by the given URI
32 Parameters
33 ----------
34 uri : ParseResult
35 The URI of the EnderChest to connect to
36 timeout : float, optional
37 The number of seconds to wait before timing out the sync operation.
38 If None is provided, no explicit timeout value will be set.
40 Yields
41 ------
42 SFTPClient
43 A Paramiko SFTP client connected to the specified server
45 Raises
46 ------
47 ValueError
48 If the URI is invalid or the credentials are incorrect
49 RuntimeError
50 If the server cannot be reached
51 """
52 ssh_client = paramiko.client.SSHClient()
53 ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
55 extra_kwargs: dict[str, Any] = {}
56 if timeout is not None:
57 extra_kwargs["timeout"] = timeout
59 try:
60 ssh_client.connect(
61 uri.hostname or "localhost",
62 port=uri.port or 22,
63 username=uri.username,
64 # note: passing in password is explicitly unsupported
65 **extra_kwargs,
66 )
67 except paramiko.AuthenticationException:
68 target = ((uri.username + "@") if uri.username else "") + (
69 uri.hostname or "localhost"
70 )
72 SYNC_LOGGER.warning(
73 f"This machine is not set up for passwordless login to {target}"
74 "\nFor instructions on setting up public key-based authentication,"
75 " which is both"
76 "\nmore convenient and more secure, see:"
77 "\nhttps://openbagtwo.github.io/EnderChest"
78 "/dev/suggestions/#passwordless-ssh-authentication"
79 )
80 password = prompt(f"Please enter the password for {target}", is_password=True)
81 try:
82 ssh_client.connect(
83 uri.hostname or "localhost",
84 port=uri.port or 22,
85 username=uri.username,
86 password=password,
87 **extra_kwargs,
88 )
89 except paramiko.AuthenticationException as bad_login:
90 raise ValueError(
91 "Authentication failed."
92 " Did you supply the correct username and password?"
93 ) from bad_login
95 try:
96 sftp_client = ssh_client.open_sftp()
97 yield sftp_client
98 sftp_client.close()
99 finally:
100 ssh_client.close()
103def download_file(
104 client: paramiko.sftp_client.SFTPClient,
105 remote_loc: str,
106 local_path: Path,
107 remote_stat: paramiko.SFTPAttributes,
108) -> None:
109 """Download a file from a remote SFTP server and save it at the specified
110 location.
112 Parameters
113 ----------
114 client : Paramiko SFTP client
115 An authenticated client connected to the remote server
116 remote_loc : str
117 The POSIX path of the file to download
118 local_path : Path
119 The path to locally save the file
120 remote_stat : stat-like
121 The `os.stat_result`-like properties of the remote object
123 Notes
124 -----
125 This is a wrapper around `client.get()` that can handle symlinks and
126 updating timestamps. It does not check if either path is valid, points
127 to a file, lives in an existing folder, etc.
128 """
129 if stat.S_ISLNK(remote_stat.st_mode or 0):
130 local_path.symlink_to(Path((client.readlink(remote_loc) or "")))
131 else:
132 client.get(remote_loc, local_path)
133 if remote_stat.st_atime and remote_stat.st_mtime:
134 os.utime(
135 local_path,
136 times=(remote_stat.st_atime, remote_stat.st_mtime),
137 )
140def upload_file(
141 client: paramiko.sftp_client.SFTPClient,
142 local_path: Path,
143 remote_loc: str,
144) -> None:
145 """Upload a local file to a remote SFTP server
147 Parameters
148 ----------
149 client : Paramiko SFTP client
150 An authenticated client connected to the remote server
151 local_path : Path
152 The path of the file to upload
153 remote_loc : str
154 The POSIX path for the remote location to save the file
156 Notes
157 -----
158 This is just a wrapper around `client.put()` that can handle symlinks.
159 It does not check if either path is valid, points to a file, lives in an
160 existing folder, etc.
161 """
162 if local_path.is_symlink():
163 client.symlink(local_path.readlink().as_posix(), remote_loc)
164 else:
165 client.put(local_path, remote_loc)
166 client.utime(
167 remote_loc, times=(local_path.stat().st_atime, local_path.stat().st_mtime)
168 )
171def rglob(
172 client: paramiko.sftp_client.SFTPClient, path: str
173) -> list[tuple[Path, paramiko.sftp_attr.SFTPAttributes]]:
174 """Recursively enumerate the contents of a remote directory
176 Parameters
177 ----------
178 client : Paramiko SFTP client
179 An authenticated client connected to the remote server
180 path : str
181 The absolute path to scan
183 Returns
184 -------
185 list of (Path, SFTPAttributes) tuples
186 The attributes of all files, folders and symlinks found under the
187 specified path
189 Notes
190 -----
191 - The paths returned are *absolute*
192 - The search is performed depth-first
193 """
194 SYNC_LOGGER.debug(f"ls {path}")
195 top_level = client.listdir_attr(path)
196 contents: list[tuple[Path, paramiko.sftp_attr.SFTPAttributes]] = []
197 for remote_object in top_level:
198 remote_object.filename = posixpath.join(path, remote_object.filename)
199 contents.append((Path(url2pathname(remote_object.filename)), remote_object))
200 if stat.S_ISDIR(remote_object.st_mode or 0):
201 contents.extend(rglob(client, remote_object.filename))
202 return contents
205def get_contents(
206 client: paramiko.sftp_client.SFTPClient, path: str
207) -> list[tuple[Path, paramiko.sftp_attr.SFTPAttributes]]:
208 """Recursively fetch the contents of a remote directory
210 Parameters
211 ----------
212 client : Paramiko SFTP client
213 An authenticated client connected to the remote server
214 path : str
215 The absolute path to scan
217 Returns
218 -------
219 list of (Path, SFTPAttributes) tuples
220 The attributes of all files, folders and symlinks found under the
221 specified path
223 Notes
224 -----
225 - This list is generated via a depth-first search so that all parent
226 directories appear before their children
227 - The paths returned are relative to the provided path
228 """
229 return [
230 (p.relative_to(url2pathname(path)), path_stat)
231 for p, path_stat in rglob(client, path)
232 ]
235def pull(
236 remote_uri: ParseResult,
237 local_path: Path,
238 exclude: Collection[str],
239 dry_run: bool,
240 timeout: float | None = None,
241 delete: bool = True,
242 **unsupported_kwargs,
243) -> None:
244 """Sync an upstream file or folder into the specified location SFTP.
245 This will overwrite any files and folders already at the destination.
247 Parameters
248 ----------
249 remote_uri : ParseResult
250 The URI for the remote resource to copy from
251 local_path : Path
252 The destination folder
253 exclude : list of str
254 Any patterns that should be excluded from the sync
255 dry_run : bool
256 Whether to only simulate this sync (report the operations to be performed
257 but not actually perform them)
258 timeout : float, optional
259 The number of seconds to wait before timing out the sync operation.
260 If None is provided, no explicit timeout value will be set.
261 delete : bool
262 Whether part of the syncing should include deleting files at the destination
263 that aren't at the source. Default is True.
264 **unsupported_kwargs
265 Any other provided options will be ignored
267 Raises
268 ------
269 FileNotFoundError
270 If the destination folder does not exist, or if the remote path
271 does not exist
272 OSError
273 If the remote path cannot be accessed for any other reason (permissions,
274 most likely)
276 Notes
277 -----
278 - If the destination folder does not already exist, this method will not
279 create it or its parent directories.
280 """
281 if not local_path.exists():
282 raise FileNotFoundError(f"{local_path} does not exist")
283 if unsupported_kwargs:
284 SYNC_LOGGER.debug(
285 "The following command-line options are ignored for this protocol:\n%s",
286 "\n".join(" {}: {}".format(*item) for item in unsupported_kwargs.items()),
287 )
289 remote_loc = posixpath.normpath(unquote(remote_uri.path))
290 destination_path = local_path / posixpath.basename(remote_loc)
292 if destination_path.is_symlink() and not destination_path.is_dir():
293 SYNC_LOGGER.warning("Removing symlink %s", destination_path)
294 if not dry_run:
295 destination_path.unlink()
296 else:
297 SYNC_LOGGER.debug(
298 "And replacing it entirely with the remote's %s", remote_loc
299 )
300 return
301 elif destination_path.exists() and not destination_path.is_dir():
302 SYNC_LOGGER.warning("Deleting file %s", destination_path)
303 if not dry_run:
304 destination_path.unlink()
305 else:
306 SYNC_LOGGER.debug(
307 "And replacing it entirely with the remote's %s", remote_loc
308 )
309 return
311 with connect(uri=remote_uri, timeout=timeout) as remote:
312 try:
313 source_target = remote.lstat(remote_loc)
314 except OSError as bad_target:
315 raise type(bad_target)(
316 f"Could not access {remote_loc} on remote: {bad_target}"
317 )
318 if not stat.S_ISDIR(source_target.st_mode or 0):
319 if destination_path.exists() and is_identical(
320 source_target, destination_path.stat()
321 ):
322 SYNC_LOGGER.warning(
323 "Remote file matches %s. No transfer needed.",
324 destination_path,
325 )
326 return
327 SYNC_LOGGER.debug(
328 "Downloading file %s from remote",
329 destination_path,
330 )
331 if not dry_run:
332 download_file(
333 remote,
334 remote_loc,
335 destination_path,
336 source_target,
337 )
338 return
340 if not destination_path.exists():
341 SYNC_LOGGER.debug(
342 "Downloading the entire contents of the remote's %s", remote_loc
343 )
344 if dry_run:
345 return
346 destination_path.mkdir()
348 source_contents = filter_contents(
349 get_contents(remote, remote_loc),
350 exclude,
351 prefix=remote_loc,
352 )
353 destination_contents = filter_contents(
354 file.get_contents(destination_path),
355 exclude,
356 prefix=destination_path,
357 )
359 sync_diff = diff(source_contents, destination_contents)
361 if dry_run:
362 generate_sync_report(sync_diff)
363 return
365 ignore = file.ignore_patterns(*exclude)
366 for path, path_stat, operation in sync_diff:
367 match (operation, stat.S_ISDIR(path_stat.st_mode or 0)):
368 case (Op.CREATE, True):
369 SYNC_LOGGER.debug("Creating directory %s", destination_path / path)
370 (destination_path / path).mkdir(parents=True, exist_ok=True)
371 case (Op.CREATE, False) | (Op.REPLACE, False):
372 SYNC_LOGGER.debug(
373 "Downloading file %s from remote",
374 destination_path / path,
375 )
376 (destination_path / path).unlink(missing_ok=True)
377 download_file(
378 remote,
379 posixpath.join(remote_loc, path.as_posix()),
380 destination_path / path,
381 path_stat, # type: ignore[arg-type]
382 )
383 case (Op.DELETE, True):
384 # recall that for deletions, it's the *destination's* stats
385 if delete:
386 file.clean(destination_path / path, ignore, dry_run)
387 case (Op.DELETE, False):
388 SYNC_LOGGER.debug("Deleting file %s", destination_path / path)
389 if delete:
390 (destination_path / path).unlink()
391 case op, is_dir: # pragma: no cover
392 raise NotImplementedError(
393 f"Don't know how to handle {op} of {'directory' if is_dir else 'file'}"
394 )
397def push(
398 local_path: Path,
399 remote_uri: ParseResult,
400 exclude: Collection[str],
401 dry_run: bool,
402 timeout: float | None = None,
403 delete: bool = True,
404 **unsupported_kwargs,
405) -> None:
406 """Sync a local file or folder into the specified location using SFTP.
407 This will overwrite any files and folders already at the destination.
409 Parameters
410 ----------
411 local_path : Path
412 The file or folder to copy
413 remote_uri : ParseResult
414 The URI for the remote location to copy into
415 exclude : list of str
416 Any patterns that should be excluded from the sync
417 dry_run : bool
418 Whether to only simulate this sync (report the operations to be performed
419 but not actually perform them)
420 timeout : float, optional
421 The number of seconds to wait before timing out the sync operation.
422 If None is provided, no explicit timeout value will be set.
423 delete : bool, optional
424 Whether part of the syncing should include deleting files at the destination
425 that aren't at the source. Default is True.
426 **unsupported_kwargs
427 Any other provided options will be ignored
429 Notes
430 -----
431 - If the destination folder does not already exist, this method will not
432 create it or its parent directories.
433 """
434 if not local_path.exists():
435 raise FileNotFoundError(f"{local_path} does not exist.")
436 if unsupported_kwargs:
437 SYNC_LOGGER.debug(
438 "The following command-line options are ignored for this protocol:\n%s",
439 "\n".join(" {}: {}".format(*item) for item in unsupported_kwargs.items()),
440 )
442 remote_parent = posixpath.normpath(unquote(remote_uri.path))
444 with connect(uri=remote_uri, timeout=timeout) as remote:
445 try:
446 remote_folder_stat = remote.lstat(remote_parent)
447 except OSError as bad_target:
448 raise type(bad_target)(
449 f"Could not access {remote_parent} on remote: {bad_target}"
450 )
451 if not stat.S_ISDIR(remote_folder_stat.st_mode or 0):
452 raise NotADirectoryError(f"{remote_parent} on remote is not a directory.")
454 remote_loc = posixpath.join(remote_parent, local_path.name)
455 try:
456 target_stat = remote.lstat(remote_loc)
457 except FileNotFoundError:
458 target_stat = None
459 if not stat.S_ISDIR(local_path.stat().st_mode or 0):
460 if target_stat and is_identical(local_path.stat(), target_stat):
461 SYNC_LOGGER.warning("Remote file matches %s", local_path)
462 return
464 SYNC_LOGGER.debug(
465 "Uploading file %s to remote",
466 local_path,
467 )
468 if not dry_run:
469 upload_file(remote, local_path, remote_loc)
470 return
471 if not target_stat:
472 SYNC_LOGGER.debug("Uploading the entire contents %s", local_path)
473 if dry_run:
474 return
475 remote.mkdir(remote_loc)
476 elif not stat.S_ISDIR(target_stat.st_mode or 0):
477 SYNC_LOGGER.warning(
478 "Deleting remote file or symlink %s",
479 remote_loc,
480 )
481 if dry_run:
482 SYNC_LOGGER.debug("And replacing it entirely with %s", local_path)
483 return
484 remote.remove(remote_loc)
485 remote.mkdir(remote_loc)
487 source_contents = filter_contents(
488 file.get_contents(local_path), exclude, prefix=local_path
489 )
490 destination_contents = filter_contents(
491 get_contents(remote, remote_loc),
492 exclude,
493 prefix=remote_loc,
494 )
496 sync_diff = diff(source_contents, destination_contents)
498 if dry_run:
499 generate_sync_report(sync_diff)
500 return
502 for path, path_stat, operation in sync_diff:
503 posix_path = posixpath.join(remote_loc, path.as_posix())
504 match (operation, stat.S_ISDIR(path_stat.st_mode or 0)):
505 case (Op.CREATE, True):
506 SYNC_LOGGER.debug("Creating remote directory %s", posix_path)
507 remote.mkdir(posix_path)
508 case (Op.CREATE, False) | (Op.REPLACE, False):
509 SYNC_LOGGER.debug(
510 "Uploading file %s to remote",
511 local_path / path,
512 )
513 try:
514 remote.remove(posix_path)
515 except FileNotFoundError:
516 pass
517 upload_file(
518 remote,
519 local_path / path,
520 posix_path,
521 )
522 case (Op.DELETE, True):
523 # recall that for deletions, it's the *destination's* stats
524 if delete:
525 remote.rmdir(posix_path)
526 case (Op.DELETE, False):
527 if delete:
528 SYNC_LOGGER.debug("Deleting remote file %s", posix_path)
529 remote.remove(posix_path)
530 case op, is_dir: # pragma: no cover
531 raise NotImplementedError(
532 f"Don't know how to handle {op} of {'directory' if is_dir else 'file'}"
533 )