Coverage for enderchest/sync/sftp.py: 89%
180 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-06 16:00 +0000
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-06 16:00 +0000
1"""paramiko-based sftp sync implementation"""
2import os
3import posixpath
4import stat
5from contextlib import contextmanager
6from pathlib import Path
7from typing import Any, Collection, Generator
8from urllib.parse import ParseResult, unquote
9from urllib.request import url2pathname
11import paramiko
13from ..prompt import prompt
14from . import (
15 SYNC_LOGGER,
16 Op,
17 diff,
18 file,
19 filter_contents,
20 generate_sync_report,
21 is_identical,
22)
25@contextmanager
26def connect(
27 uri: ParseResult, timeout: float | None = None
28) -> Generator[paramiko.sftp_client.SFTPClient, None, None]:
29 """Yield an SFTPClient connected to the server specified by the given URI
31 Parameters
32 ----------
33 uri : ParseResult
34 The URI of the EnderChest to connect to
35 timeout : float, optional
36 The number of seconds to wait before timing out the sync operation.
37 If None is provided, no explicit timeout value will be set.
39 Yields
40 ------
41 SFTPClient
42 A Paramiko SFTP client connected to the specified server
44 Raises
45 ------
46 ValueError
47 If the URI is invalid or the credentials are incorrect
48 RuntimeError
49 If the server cannot be reached
50 """
51 ssh_client = paramiko.client.SSHClient()
52 ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
54 extra_kwargs: dict[str, Any] = {}
55 if timeout is not None:
56 extra_kwargs["timeout"] = timeout
58 try:
59 ssh_client.connect(
60 uri.hostname or "localhost",
61 port=uri.port or 22,
62 username=uri.username,
63 # note: passing in password is explicitly unsupported
64 **extra_kwargs,
65 )
66 except paramiko.AuthenticationException:
67 target = ((uri.username + "@") if uri.username else "") + (
68 uri.hostname or "localhost"
69 )
71 SYNC_LOGGER.warning(
72 f"This machine is not set up for passwordless login to {target}"
73 "\nFor instructions on setting up public key-based authentication,"
74 " which is both"
75 "\nmore convenient and more secure, see:"
76 "\nhttps://openbagtwo.github.io/EnderChest"
77 "/dev/suggestions/#passwordless-ssh-authentication"
78 )
79 password = prompt(f"Please enter the password for {target}", is_password=True)
80 try:
81 ssh_client.connect(
82 uri.hostname or "localhost",
83 port=uri.port or 22,
84 username=uri.username,
85 password=password,
86 **extra_kwargs,
87 )
88 except paramiko.AuthenticationException as bad_login:
89 raise ValueError(
90 "Authentication failed."
91 " Did you supply the correct username and password?"
92 ) from bad_login
94 try:
95 sftp_client = ssh_client.open_sftp()
96 yield sftp_client
97 sftp_client.close()
98 finally:
99 ssh_client.close()
102def download_file(
103 client: paramiko.sftp_client.SFTPClient,
104 remote_loc: str,
105 local_path: Path,
106 remote_stat: paramiko.SFTPAttributes,
107) -> None:
108 """Download a file from a remote SFTP server and save it at the specified
109 location.
111 Parameters
112 ----------
113 client : Paramiko SFTP client
114 An authenticated client connected to the remote server
115 remote_loc : str
116 The POSIX path of the file to download
117 local_path : Path
118 The path to locally save the file
119 remote_stat : stat-like
120 The `os.stat_result`-like properties of the remote object
122 Notes
123 -----
124 This is a wrapper around `client.get()` that can handle symlinks and
125 updating timestamps. It does not check if either path is valid, points
126 to a file, lives in an existing folder, etc.
127 """
128 if stat.S_ISLNK(remote_stat.st_mode or 0):
129 local_path.symlink_to(Path((client.readlink(remote_loc) or "")))
130 else:
131 client.get(remote_loc, local_path)
132 if remote_stat.st_atime and remote_stat.st_mtime:
133 os.utime(
134 local_path,
135 times=(remote_stat.st_atime, remote_stat.st_mtime),
136 )
139def upload_file(
140 client: paramiko.sftp_client.SFTPClient,
141 local_path: Path,
142 remote_loc: str,
143) -> None:
144 """Upload a local file to a remote SFTP server
146 Parameters
147 ----------
148 client : Paramiko SFTP client
149 An authenticated client connected to the remote server
150 local_path : Path
151 The path of the file to upload
152 remote_loc : str
153 The POSIX path for the remote location to save the file
155 Notes
156 -----
157 This is just a wrapper around `client.put()` that can handle symlinks.
158 It does not check if either path is valid, points to a file, lives in an
159 existing folder, etc.
160 """
161 if local_path.is_symlink():
162 client.symlink(local_path.readlink().as_posix(), remote_loc)
163 else:
164 client.put(local_path, remote_loc)
165 client.utime(
166 remote_loc, times=(local_path.stat().st_atime, local_path.stat().st_mtime)
167 )
170def rglob(
171 client: paramiko.sftp_client.SFTPClient, path: str
172) -> list[tuple[Path, paramiko.sftp_attr.SFTPAttributes]]:
173 """Recursively enumerate the contents of a remote directory
175 Parameters
176 ----------
177 client : Paramiko SFTP client
178 An authenticated client connected to the remote server
179 path : str
180 The absolute path to scan
182 Returns
183 -------
184 list of (Path, SFTPAttributes) tuples
185 The attributes of all files, folders and symlinks found under the
186 specified path
188 Notes
189 -----
190 - The paths returned are *absolute*
191 - The search is performed depth-first
192 """
193 SYNC_LOGGER.debug(f"ls {path}")
194 top_level = client.listdir_attr(path)
195 contents: list[tuple[Path, paramiko.sftp_attr.SFTPAttributes]] = []
196 for remote_object in top_level:
197 remote_object.filename = posixpath.join(path, remote_object.filename)
198 contents.append((Path(url2pathname(remote_object.filename)), remote_object))
199 if stat.S_ISDIR(remote_object.st_mode or 0):
200 contents.extend(rglob(client, remote_object.filename))
201 return contents
204def get_contents(
205 client: paramiko.sftp_client.SFTPClient, path: str
206) -> list[tuple[Path, paramiko.sftp_attr.SFTPAttributes]]:
207 """Recursively fetch the contents of a remote directory
209 Parameters
210 ----------
211 client : Paramiko SFTP client
212 An authenticated client connected to the remote server
213 path : str
214 The absolute path to scan
216 Returns
217 -------
218 list of (Path, SFTPAttributes) tuples
219 The attributes of all files, folders and symlinks found under the
220 specified path
222 Notes
223 -----
224 - This list is generated via a depth-first search so that all parent
225 directories appear before their children
226 - The paths returned are relative to the provided path
227 """
228 return [
229 (p.relative_to(url2pathname(path)), path_stat)
230 for p, path_stat in rglob(client, path)
231 ]
234def pull(
235 remote_uri: ParseResult,
236 local_path: Path,
237 exclude: Collection[str],
238 dry_run: bool,
239 timeout: float | None = None,
240 delete: bool = True,
241 **unsupported_kwargs,
242) -> None:
243 """Sync an upstream file or folder into the specified location SFTP.
244 This will overwrite any files and folders already at the destination.
246 Parameters
247 ----------
248 remote_uri : ParseResult
249 The URI for the remote resource to copy from
250 local_path : Path
251 The destination folder
252 exclude : list of str
253 Any patterns that should be excluded from the sync
254 dry_run : bool
255 Whether to only simulate this sync (report the operations to be performed
256 but not actually perform them)
257 timeout : float, optional
258 The number of seconds to wait before timing out the sync operation.
259 If None is provided, no explicit timeout value will be set.
260 delete : bool
261 Whether part of the syncing should include deleting files at the destination
262 that aren't at the source. Default is True.
263 **unsupported_kwargs
264 Any other provided options will be ignored
266 Raises
267 ------
268 FileNotFoundError
269 If the destination folder does not exist, or if the remote path
270 does not exist
271 OSError
272 If the remote path cannot be accessed for any other reason (permissions,
273 most likely)
275 Notes
276 -----
277 - If the destination folder does not already exist, this method will not
278 create it or its parent directories.
279 """
280 if not local_path.exists():
281 raise FileNotFoundError(f"{local_path} does not exist")
282 if unsupported_kwargs:
283 SYNC_LOGGER.debug(
284 "The following command-line options are ignored for this protocol:\n%s",
285 "\n".join(" {}: {}".format(*item) for item in unsupported_kwargs.items()),
286 )
288 remote_loc = posixpath.normpath(unquote(remote_uri.path))
289 destination_path = local_path / posixpath.basename(remote_loc)
291 if destination_path.is_symlink() and not destination_path.is_dir():
292 SYNC_LOGGER.warning("Removing symlink %s", destination_path)
293 if not dry_run:
294 destination_path.unlink()
295 else:
296 SYNC_LOGGER.debug(
297 "And replacing it entirely with the remote's %s", remote_loc
298 )
299 return
300 elif destination_path.exists() and not destination_path.is_dir():
301 SYNC_LOGGER.warning("Deleting file %s", destination_path)
302 if not dry_run:
303 destination_path.unlink()
304 else:
305 SYNC_LOGGER.debug(
306 "And replacing it entirely with the remote's %s", remote_loc
307 )
308 return
310 with connect(uri=remote_uri, timeout=timeout) as remote:
311 try:
312 source_target = remote.lstat(remote_loc)
313 except OSError as bad_target:
314 raise type(bad_target)(
315 f"Could not access {remote_loc} on remote: {bad_target}"
316 )
317 if not stat.S_ISDIR(source_target.st_mode or 0):
318 if destination_path.exists() and is_identical(
319 source_target, destination_path.stat()
320 ):
321 SYNC_LOGGER.warning(
322 "Remote file matches %s. No transfer needed.",
323 destination_path,
324 )
325 return
326 SYNC_LOGGER.debug(
327 "Downloading file %s from remote",
328 destination_path,
329 )
330 if not dry_run:
331 download_file(
332 remote,
333 remote_loc,
334 destination_path,
335 source_target,
336 )
337 return
339 if not destination_path.exists():
340 SYNC_LOGGER.debug(
341 "Downloading the entire contents of the remote's %s", remote_loc
342 )
343 if dry_run:
344 return
345 destination_path.mkdir()
347 source_contents = filter_contents(
348 get_contents(remote, remote_loc),
349 exclude,
350 prefix=remote_loc,
351 )
352 destination_contents = filter_contents(
353 file.get_contents(destination_path),
354 exclude,
355 prefix=destination_path,
356 )
358 sync_diff = diff(source_contents, destination_contents)
360 if dry_run:
361 generate_sync_report(sync_diff)
362 return
364 ignore = file.ignore_patterns(*exclude)
365 for path, path_stat, operation in sync_diff:
366 match (operation, stat.S_ISDIR(path_stat.st_mode or 0)):
367 case (Op.CREATE, True):
368 SYNC_LOGGER.debug("Creating directory %s", destination_path / path)
369 (destination_path / path).mkdir(parents=True, exist_ok=True)
370 case (Op.CREATE, False) | (Op.REPLACE, False):
371 SYNC_LOGGER.debug(
372 "Downloading file %s from remote",
373 destination_path / path,
374 )
375 (destination_path / path).unlink(missing_ok=True)
376 download_file(
377 remote,
378 posixpath.join(remote_loc, path.as_posix()),
379 destination_path / path,
380 path_stat, # type: ignore[arg-type]
381 )
382 case (Op.DELETE, True):
383 # recall that for deletions, it's the *destination's* stats
384 if delete:
385 file.clean(destination_path / path, ignore, dry_run)
386 case (Op.DELETE, False):
387 SYNC_LOGGER.debug("Deleting file %s", destination_path / path)
388 if delete:
389 (destination_path / path).unlink()
390 case op, is_dir: # pragma: no cover
391 raise NotImplementedError(
392 f"Don't know how to handle {op} of {'directory' if is_dir else 'file'}"
393 )
396def push(
397 local_path: Path,
398 remote_uri: ParseResult,
399 exclude: Collection[str],
400 dry_run: bool,
401 timeout: float | None = None,
402 delete: bool = True,
403 **unsupported_kwargs,
404) -> None:
405 """Sync a local file or folder into the specified location using SFTP.
406 This will overwrite any files and folders already at the destination.
408 Parameters
409 ----------
410 local_path : Path
411 The file or folder to copy
412 remote_uri : ParseResult
413 The URI for the remote location to copy into
414 exclude : list of str
415 Any patterns that should be excluded from the sync
416 dry_run : bool
417 Whether to only simulate this sync (report the operations to be performed
418 but not actually perform them)
419 timeout : float, optional
420 The number of seconds to wait before timing out the sync operation.
421 If None is provided, no explicit timeout value will be set.
422 delete : bool, optional
423 Whether part of the syncing should include deleting files at the destination
424 that aren't at the source. Default is True.
425 **unsupported_kwargs
426 Any other provided options will be ignored
428 Notes
429 -----
430 - If the destination folder does not already exist, this method will not
431 create it or its parent directories.
432 """
433 if not local_path.exists():
434 raise FileNotFoundError(f"{local_path} does not exist.")
435 if unsupported_kwargs:
436 SYNC_LOGGER.debug(
437 "The following command-line options are ignored for this protocol:\n%s",
438 "\n".join(" {}: {}".format(*item) for item in unsupported_kwargs.items()),
439 )
441 remote_parent = posixpath.normpath(unquote(remote_uri.path))
443 with connect(uri=remote_uri, timeout=timeout) as remote:
444 try:
445 remote_folder_stat = remote.lstat(remote_parent)
446 except OSError as bad_target:
447 raise type(bad_target)(
448 f"Could not access {remote_parent} on remote: {bad_target}"
449 )
450 if not stat.S_ISDIR(remote_folder_stat.st_mode or 0):
451 raise NotADirectoryError(f"{remote_parent} on remote is not a directory.")
453 remote_loc = posixpath.join(remote_parent, local_path.name)
454 try:
455 target_stat = remote.lstat(remote_loc)
456 except FileNotFoundError:
457 target_stat = None
458 if not stat.S_ISDIR(local_path.stat().st_mode or 0):
459 if target_stat and is_identical(local_path.stat(), target_stat):
460 SYNC_LOGGER.warning("Remote file matches %s", local_path)
461 return
463 SYNC_LOGGER.debug(
464 "Uploading file %s to remote",
465 local_path,
466 )
467 if not dry_run:
468 upload_file(remote, local_path, remote_loc)
469 return
470 if not target_stat:
471 SYNC_LOGGER.debug("Uploading the entire contents %s", local_path)
472 if dry_run:
473 return
474 remote.mkdir(remote_loc)
475 elif not stat.S_ISDIR(target_stat.st_mode or 0):
476 SYNC_LOGGER.warning(
477 "Deleting remote file or symlink %s",
478 remote_loc,
479 )
480 if dry_run:
481 SYNC_LOGGER.debug("And replacing it entirely with %s", local_path)
482 return
483 remote.remove(remote_loc)
484 remote.mkdir(remote_loc)
486 source_contents = filter_contents(
487 file.get_contents(local_path), exclude, prefix=local_path
488 )
489 destination_contents = filter_contents(
490 get_contents(remote, remote_loc),
491 exclude,
492 prefix=remote_loc,
493 )
495 sync_diff = diff(source_contents, destination_contents)
497 if dry_run:
498 generate_sync_report(sync_diff)
499 return
501 for path, path_stat, operation in sync_diff:
502 posix_path = posixpath.join(remote_loc, path.as_posix())
503 match (operation, stat.S_ISDIR(path_stat.st_mode or 0)):
504 case (Op.CREATE, True):
505 SYNC_LOGGER.debug("Creating remote directory %s", posix_path)
506 remote.mkdir(posix_path)
507 case (Op.CREATE, False) | (Op.REPLACE, False):
508 SYNC_LOGGER.debug(
509 "Uploading file %s to remote",
510 local_path / path,
511 )
512 try:
513 remote.remove(posix_path)
514 except FileNotFoundError:
515 pass
516 upload_file(
517 remote,
518 local_path / path,
519 posix_path,
520 )
521 case (Op.DELETE, True):
522 # recall that for deletions, it's the *destination's* stats
523 if delete:
524 remote.rmdir(posix_path)
525 case (Op.DELETE, False):
526 if delete:
527 SYNC_LOGGER.debug("Deleting remote file %s", posix_path)
528 remote.remove(posix_path)
529 case op, is_dir: # pragma: no cover
530 raise NotImplementedError(
531 f"Don't know how to handle {op} of {'directory' if is_dir else 'file'}"
532 )