Coverage for enderchest/sync/sftp.py: 90%
181 statements
« prev ^ index » next coverage.py v7.7.1, created at 2025-03-28 20:32 +0000
« prev ^ index » next coverage.py v7.7.1, created at 2025-03-28 20:32 +0000
1"""paramiko-based sftp sync implementation"""
3import os
4import posixpath
5import stat
6from collections.abc import Collection, Generator
7from contextlib import contextmanager
8from pathlib import Path
9from typing import Any
10from urllib.parse import ParseResult, unquote
11from urllib.request import url2pathname
13import paramiko
15from ..prompt import prompt
16from . import (
17 SYNC_LOGGER,
18 Op,
19 diff,
20 file,
21 filter_contents,
22 generate_sync_report,
23 is_identical,
24)
27@contextmanager
28def connect(
29 uri: ParseResult, timeout: float | None = None
30) -> Generator[paramiko.sftp_client.SFTPClient, None, None]:
31 """Yield an SFTPClient connected to the server specified by the given URI
33 Parameters
34 ----------
35 uri : ParseResult
36 The URI of the EnderChest to connect to
37 timeout : float, optional
38 The number of seconds to wait before timing out the sync operation.
39 If None is provided, no explicit timeout value will be set.
41 Yields
42 ------
43 SFTPClient
44 A Paramiko SFTP client connected to the specified server
46 Raises
47 ------
48 ValueError
49 If the URI is invalid or the credentials are incorrect
50 RuntimeError
51 If the server cannot be reached
52 """
53 ssh_client = paramiko.client.SSHClient()
54 ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
56 extra_kwargs: dict[str, Any] = {}
57 if timeout is not None:
58 extra_kwargs["timeout"] = timeout
60 try:
61 ssh_client.connect(
62 uri.hostname or "localhost",
63 port=uri.port or 22,
64 username=uri.username,
65 # note: passing in password is explicitly unsupported
66 **extra_kwargs,
67 )
68 except paramiko.AuthenticationException:
69 target = ((uri.username + "@") if uri.username else "") + (
70 uri.hostname or "localhost"
71 )
73 SYNC_LOGGER.warning(
74 f"This machine is not set up for passwordless login to {target}"
75 "\nFor instructions on setting up public key-based authentication,"
76 " which is both"
77 "\nmore convenient and more secure, see:"
78 "\nhttps://openbagtwo.github.io/EnderChest"
79 "/dev/suggestions/#passwordless-ssh-authentication"
80 )
81 password = prompt(f"Please enter the password for {target}", is_password=True)
82 try:
83 ssh_client.connect(
84 uri.hostname or "localhost",
85 port=uri.port or 22,
86 username=uri.username,
87 password=password,
88 **extra_kwargs,
89 )
90 except paramiko.AuthenticationException as bad_login:
91 raise ValueError(
92 "Authentication failed."
93 " Did you supply the correct username and password?"
94 ) from bad_login
96 try:
97 sftp_client = ssh_client.open_sftp()
98 yield sftp_client
99 sftp_client.close()
100 finally:
101 ssh_client.close()
104def download_file(
105 client: paramiko.sftp_client.SFTPClient,
106 remote_loc: str,
107 local_path: Path,
108 remote_stat: paramiko.SFTPAttributes,
109) -> None:
110 """Download a file from a remote SFTP server and save it at the specified
111 location.
113 Parameters
114 ----------
115 client : Paramiko SFTP client
116 An authenticated client connected to the remote server
117 remote_loc : str
118 The POSIX path of the file to download
119 local_path : Path
120 The path to locally save the file
121 remote_stat : stat-like
122 The `os.stat_result`-like properties of the remote object
124 Notes
125 -----
126 This is a wrapper around `client.get()` that can handle symlinks and
127 updating timestamps. It does not check if either path is valid, points
128 to a file, lives in an existing folder, etc.
129 """
130 if stat.S_ISLNK(remote_stat.st_mode or 0):
131 local_path.symlink_to(Path((client.readlink(remote_loc) or "")))
132 else:
133 client.get(remote_loc, local_path)
134 if remote_stat.st_atime and remote_stat.st_mtime:
135 os.utime(
136 local_path,
137 times=(remote_stat.st_atime, remote_stat.st_mtime),
138 )
141def upload_file(
142 client: paramiko.sftp_client.SFTPClient,
143 local_path: Path,
144 remote_loc: str,
145) -> None:
146 """Upload a local file to a remote SFTP server
148 Parameters
149 ----------
150 client : Paramiko SFTP client
151 An authenticated client connected to the remote server
152 local_path : Path
153 The path of the file to upload
154 remote_loc : str
155 The POSIX path for the remote location to save the file
157 Notes
158 -----
159 This is just a wrapper around `client.put()` that can handle symlinks.
160 It does not check if either path is valid, points to a file, lives in an
161 existing folder, etc.
162 """
163 if local_path.is_symlink():
164 client.symlink(local_path.readlink().as_posix(), remote_loc)
165 else:
166 client.put(local_path, remote_loc)
167 client.utime(
168 remote_loc, times=(local_path.stat().st_atime, local_path.stat().st_mtime)
169 )
172def rglob(
173 client: paramiko.sftp_client.SFTPClient, path: str
174) -> list[tuple[Path, paramiko.sftp_attr.SFTPAttributes]]:
175 """Recursively enumerate the contents of a remote directory
177 Parameters
178 ----------
179 client : Paramiko SFTP client
180 An authenticated client connected to the remote server
181 path : str
182 The absolute path to scan
184 Returns
185 -------
186 list of (Path, SFTPAttributes) tuples
187 The attributes of all files, folders and symlinks found under the
188 specified path
190 Notes
191 -----
192 - The paths returned are *absolute*
193 - The search is performed depth-first
194 """
195 SYNC_LOGGER.debug(f"ls {path}")
196 top_level = client.listdir_attr(path)
197 contents: list[tuple[Path, paramiko.sftp_attr.SFTPAttributes]] = []
198 for remote_object in top_level:
199 remote_object.filename = posixpath.join(path, remote_object.filename)
200 contents.append((Path(url2pathname(remote_object.filename)), remote_object))
201 if stat.S_ISDIR(remote_object.st_mode or 0):
202 contents.extend(rglob(client, remote_object.filename))
203 return contents
206def get_contents(
207 client: paramiko.sftp_client.SFTPClient, path: str
208) -> list[tuple[Path, paramiko.sftp_attr.SFTPAttributes]]:
209 """Recursively fetch the contents of a remote directory
211 Parameters
212 ----------
213 client : Paramiko SFTP client
214 An authenticated client connected to the remote server
215 path : str
216 The absolute path to scan
218 Returns
219 -------
220 list of (Path, SFTPAttributes) tuples
221 The attributes of all files, folders and symlinks found under the
222 specified path
224 Notes
225 -----
226 - This list is generated via a depth-first search so that all parent
227 directories appear before their children
228 - The paths returned are relative to the provided path
229 """
230 return [
231 (p.relative_to(url2pathname(path)), path_stat)
232 for p, path_stat in rglob(client, path)
233 ]
236def pull(
237 remote_uri: ParseResult,
238 local_path: Path,
239 exclude: Collection[str],
240 dry_run: bool,
241 timeout: float | None = None,
242 delete: bool = True,
243 **unsupported_kwargs,
244) -> None:
245 """Sync an upstream file or folder into the specified location SFTP.
246 This will overwrite any files and folders already at the destination.
248 Parameters
249 ----------
250 remote_uri : ParseResult
251 The URI for the remote resource to copy from
252 local_path : Path
253 The destination folder
254 exclude : list of str
255 Any patterns that should be excluded from the sync
256 dry_run : bool
257 Whether to only simulate this sync (report the operations to be performed
258 but not actually perform them)
259 timeout : float, optional
260 The number of seconds to wait before timing out the sync operation.
261 If None is provided, no explicit timeout value will be set.
262 delete : bool
263 Whether part of the syncing should include deleting files at the destination
264 that aren't at the source. Default is True.
265 **unsupported_kwargs
266 Any other provided options will be ignored
268 Raises
269 ------
270 FileNotFoundError
271 If the destination folder does not exist, or if the remote path
272 does not exist
273 OSError
274 If the remote path cannot be accessed for any other reason (permissions,
275 most likely)
277 Notes
278 -----
279 - If the destination folder does not already exist, this method will not
280 create it or its parent directories.
281 """
282 if not local_path.exists():
283 raise FileNotFoundError(f"{local_path} does not exist")
284 if unsupported_kwargs:
285 SYNC_LOGGER.debug(
286 "The following command-line options are ignored for this protocol:\n%s",
287 "\n".join(" {}: {}".format(*item) for item in unsupported_kwargs.items()),
288 )
290 remote_loc = posixpath.normpath(unquote(remote_uri.path))
291 destination_path = local_path / posixpath.basename(remote_loc)
293 if destination_path.is_symlink() and not destination_path.is_dir():
294 SYNC_LOGGER.warning("Removing symlink %s", destination_path)
295 if not dry_run:
296 destination_path.unlink()
297 else:
298 SYNC_LOGGER.debug(
299 "And replacing it entirely with the remote's %s", remote_loc
300 )
301 return
302 elif destination_path.exists() and not destination_path.is_dir():
303 SYNC_LOGGER.warning("Deleting file %s", destination_path)
304 if not dry_run:
305 destination_path.unlink()
306 else:
307 SYNC_LOGGER.debug(
308 "And replacing it entirely with the remote's %s", remote_loc
309 )
310 return
312 with connect(uri=remote_uri, timeout=timeout) as remote:
313 try:
314 source_target = remote.lstat(remote_loc)
315 except OSError as bad_target:
316 raise type(bad_target)(
317 f"Could not access {remote_loc} on remote: {bad_target}"
318 )
319 if not stat.S_ISDIR(source_target.st_mode or 0):
320 if destination_path.exists() and is_identical(
321 source_target, destination_path.stat()
322 ):
323 SYNC_LOGGER.warning(
324 "Remote file matches %s. No transfer needed.",
325 destination_path,
326 )
327 return
328 SYNC_LOGGER.debug(
329 "Downloading file %s from remote",
330 destination_path,
331 )
332 if not dry_run:
333 download_file(
334 remote,
335 remote_loc,
336 destination_path,
337 source_target,
338 )
339 return
341 if not destination_path.exists():
342 SYNC_LOGGER.debug(
343 "Downloading the entire contents of the remote's %s", remote_loc
344 )
345 if dry_run:
346 return
347 destination_path.mkdir()
349 source_contents = filter_contents(
350 get_contents(remote, remote_loc),
351 exclude,
352 prefix=remote_loc,
353 )
354 destination_contents = filter_contents(
355 file.get_contents(destination_path),
356 exclude,
357 prefix=destination_path,
358 )
360 sync_diff = diff(source_contents, destination_contents)
362 if dry_run:
363 generate_sync_report(sync_diff)
364 return
366 ignore = file.ignore_patterns(*exclude)
367 for path, path_stat, operation in sync_diff:
368 match (operation, stat.S_ISDIR(path_stat.st_mode or 0)):
369 case (Op.CREATE, True):
370 SYNC_LOGGER.debug("Creating directory %s", destination_path / path)
371 (destination_path / path).mkdir(parents=True, exist_ok=True)
372 case (Op.CREATE, False) | (Op.REPLACE, False):
373 SYNC_LOGGER.debug(
374 "Downloading file %s from remote",
375 destination_path / path,
376 )
377 (destination_path / path).unlink(missing_ok=True)
378 download_file(
379 remote,
380 posixpath.join(remote_loc, path.as_posix()),
381 destination_path / path,
382 path_stat, # type: ignore[arg-type]
383 )
384 case (Op.DELETE, True):
385 # recall that for deletions, it's the *destination's* stats
386 if delete:
387 file.clean(destination_path / path, ignore, dry_run)
388 case (Op.DELETE, False):
389 SYNC_LOGGER.debug("Deleting file %s", destination_path / path)
390 if delete:
391 (destination_path / path).unlink()
392 case op, is_dir: # pragma: no cover
393 raise NotImplementedError(
394 f"Don't know how to handle {op} of {'directory' if is_dir else 'file'}"
395 )
398def push(
399 local_path: Path,
400 remote_uri: ParseResult,
401 exclude: Collection[str],
402 dry_run: bool,
403 timeout: float | None = None,
404 delete: bool = True,
405 **unsupported_kwargs,
406) -> None:
407 """Sync a local file or folder into the specified location using SFTP.
408 This will overwrite any files and folders already at the destination.
410 Parameters
411 ----------
412 local_path : Path
413 The file or folder to copy
414 remote_uri : ParseResult
415 The URI for the remote location to copy into
416 exclude : list of str
417 Any patterns that should be excluded from the sync
418 dry_run : bool
419 Whether to only simulate this sync (report the operations to be performed
420 but not actually perform them)
421 timeout : float, optional
422 The number of seconds to wait before timing out the sync operation.
423 If None is provided, no explicit timeout value will be set.
424 delete : bool, optional
425 Whether part of the syncing should include deleting files at the destination
426 that aren't at the source. Default is True.
427 **unsupported_kwargs
428 Any other provided options will be ignored
430 Notes
431 -----
432 - If the destination folder does not already exist, this method will not
433 create it or its parent directories.
434 """
435 if not local_path.exists():
436 raise FileNotFoundError(f"{local_path} does not exist.")
437 if unsupported_kwargs:
438 SYNC_LOGGER.debug(
439 "The following command-line options are ignored for this protocol:\n%s",
440 "\n".join(" {}: {}".format(*item) for item in unsupported_kwargs.items()),
441 )
443 remote_parent = posixpath.normpath(unquote(remote_uri.path))
445 with connect(uri=remote_uri, timeout=timeout) as remote:
446 try:
447 remote_folder_stat = remote.lstat(remote_parent)
448 except OSError as bad_target:
449 raise type(bad_target)(
450 f"Could not access {remote_parent} on remote: {bad_target}"
451 )
452 if not stat.S_ISDIR(remote_folder_stat.st_mode or 0):
453 raise NotADirectoryError(f"{remote_parent} on remote is not a directory.")
455 remote_loc = posixpath.join(remote_parent, local_path.name)
456 try:
457 target_stat = remote.lstat(remote_loc)
458 except FileNotFoundError:
459 target_stat = None
460 if not stat.S_ISDIR(local_path.stat().st_mode or 0):
461 if target_stat and is_identical(local_path.stat(), target_stat):
462 SYNC_LOGGER.warning("Remote file matches %s", local_path)
463 return
465 SYNC_LOGGER.debug(
466 "Uploading file %s to remote",
467 local_path,
468 )
469 if not dry_run:
470 upload_file(remote, local_path, remote_loc)
471 return
472 if not target_stat:
473 SYNC_LOGGER.debug("Uploading the entire contents %s", local_path)
474 if dry_run:
475 return
476 remote.mkdir(remote_loc)
477 elif not stat.S_ISDIR(target_stat.st_mode or 0):
478 SYNC_LOGGER.warning(
479 "Deleting remote file or symlink %s",
480 remote_loc,
481 )
482 if dry_run:
483 SYNC_LOGGER.debug("And replacing it entirely with %s", local_path)
484 return
485 remote.remove(remote_loc)
486 remote.mkdir(remote_loc)
488 source_contents = filter_contents(
489 file.get_contents(local_path), exclude, prefix=local_path
490 )
491 destination_contents = filter_contents(
492 get_contents(remote, remote_loc),
493 exclude,
494 prefix=remote_loc,
495 )
497 sync_diff = diff(source_contents, destination_contents)
499 if dry_run:
500 generate_sync_report(sync_diff)
501 return
503 for path, path_stat, operation in sync_diff:
504 posix_path = posixpath.join(remote_loc, path.as_posix())
505 match (operation, stat.S_ISDIR(path_stat.st_mode or 0)):
506 case (Op.CREATE, True):
507 SYNC_LOGGER.debug("Creating remote directory %s", posix_path)
508 remote.mkdir(posix_path)
509 case (Op.CREATE, False) | (Op.REPLACE, False):
510 SYNC_LOGGER.debug(
511 "Uploading file %s to remote",
512 local_path / path,
513 )
514 try:
515 remote.remove(posix_path)
516 except FileNotFoundError:
517 pass
518 upload_file(
519 remote,
520 local_path / path,
521 posix_path,
522 )
523 case (Op.DELETE, True):
524 # recall that for deletions, it's the *destination's* stats
525 if delete:
526 remote.rmdir(posix_path)
527 case (Op.DELETE, False):
528 if delete:
529 SYNC_LOGGER.debug("Deleting remote file %s", posix_path)
530 remote.remove(posix_path)
531 case op, is_dir: # pragma: no cover
532 raise NotImplementedError(
533 f"Don't know how to handle {op} of {'directory' if is_dir else 'file'}"
534 )