Coverage for enderchest/sync/rsync.py: 87%
138 statements
« prev ^ index » next coverage.py v7.10.1, created at 2025-07-30 12:06 +0000
« prev ^ index » next coverage.py v7.10.1, created at 2025-07-30 12:06 +0000
1"""rsync sync implementation. Relies on the user having rsync installed on their system"""
3import os.path
4import re
5import shutil
6import subprocess
7from collections import defaultdict
8from collections.abc import Iterable
9from pathlib import Path
10from urllib.parse import ParseResult, unquote
12from . import SYNC_LOGGER, get_default_netloc, uri_to_ssh
14RSYNC = shutil.which("rsync")
15if RSYNC is None: # pragma: no cover
16 raise RuntimeError(
17 "No rsync executable found on your system. Cannot sync using this protocol."
18 )
21def _get_rsync_version() -> tuple[int, int]:
22 """Determine the installed version of Rsync
24 Returns
25 -------
26 int
27 The major version of the resolved Rsync executable
28 int
29 The minor version of the resolved Rsync executable
31 Raises
32 -----
33 RuntimeError
34 If Rsync is not installed, if `rsync --version` returns an error or if
35 the version information cannot be decoded from the `rsync --version`
36 output
37 """
38 try:
39 result = subprocess.run(
40 ["rsync", "--version"],
41 stdout=subprocess.PIPE,
42 stderr=subprocess.PIPE,
43 check=False,
44 )
45 if result.stderr: # TODO: #124 just use check=True
46 raise RuntimeError(result.stderr.decode("utf-8"))
48 head = result.stdout.decode("utf-8").splitlines()[0]
49 except (FileNotFoundError, IndexError):
50 raise RuntimeError("Rsync is not installed or could not be executed.")
52 try:
53 if match := re.match(
54 r"^rsync[\s]+version ([0-9]+).([0-9]+).([0-9]+)",
55 head,
56 ):
57 major, minor, *_ = match.groups()
58 return int(major), int(minor)
59 raise AssertionError
60 except (AssertionError, ValueError):
61 raise RuntimeError(f"Could not parse version output:\n{head}")
64rsync_version = _get_rsync_version()
65if rsync_version < (3, 2):
66 raise RuntimeError(
67 "EnderChest requires Rsync 3.2 or newer."
68 " The version detected on your system is {}.{}".format(*rsync_version)
69 )
72def run_rsync(
73 working_directory: Path,
74 source: str,
75 destination_folder: str,
76 delete: bool,
77 dry_run: bool,
78 exclude: Iterable[str],
79 *additional_args: str,
80 timeout: int | None = None,
81 verbosity: int = 0,
82 rsync_flags: str | None = None,
83) -> None:
84 """Run an operation with rsync
86 Parameters
87 ----------
88 working_directory : Path
89 The working directory to run the sync command from
90 source : str
91 The source file or folder to sync, specified as either a URI string,
92 an ssh address or a path relative to the working directory
93 destination_folder : str
94 The destination folder where the file or folder should be synced to,
95 with the same formats available as for source
96 delete : bool
97 Whether part of the syncing should include deleting files at the destination
98 that aren't at the source
99 dry_run : bool
100 Whether to only simulate this sync (report the operations to be performed
101 but not actually perform them)
102 exclude : list of str
103 Any patterns that should be excluded from the sync (and sync)
104 *additional_args : str
105 Any additional arguments to pass into the rsync command
106 timeout : int, optional
107 The number of seconds to wait before timing out the sync operation.
108 If None is provided, no explicit timeout value will be set.
109 verbosity : int
110 A modifier for how much info to output either to stdout or the INFO-level
111 logs. At...
113 - verbosity = -2 : No information will be printed, even on dry runs
114 - verbosity = -1 : The sync itself will be silent. Dry runs will only
115 report the sync statistics.
116 - verbosity = 0 : Actual syncs will display a progress bar. Dry run
117 reports will summarize the changes to each shulker
118 box in addition to reporting the sync statistics .
119 - verbosity = 1 : Actual syncs will report the progress of each file
120 transfer. Dry runs will report on each file to
121 be created, updated or deleted.
122 - verbosity = 2 : Dry runs and syncs will print or log the output
123 of rsync run using the `-vv` modifier
125 Verbosity values outside of this range will simply be capped / floored
126 to [-2, 2].
127 rsync_flags : str, optional
128 By default, rsync will be run using the flags "shaz" which means:
130 - no space splitting
131 - use output (file sizes, mostly) human-readable
132 - archive mode (see: https://www.baeldung.com/linux/rsync-archive-mode)
133 - compress data during transfer
135 Advanced users may choose to override these options, but **you do so
136 at your own peril**.
138 Raises
139 ------
140 TimeoutError
141 If the rsync operation times out before completion
142 RuntimeError
143 If the rsync operation fails for any other reason
145 Notes
146 -----
147 This method does not perform any validation or normalization of the source,
148 destination, exclude-list, additional arguments or rsync options.
149 """
150 rsync_flags = rsync_flags or "shaz"
152 args: list[str] = [RSYNC, f"-{rsync_flags}"] # type: ignore[list-item]
153 if delete:
154 args.append("--delete")
155 if dry_run:
156 args.extend(("--dry-run", "--stats"))
157 if verbosity < 1:
158 # at 1+ we don't need it to be machine-parseable
159 args.append("--out-format=%i %n")
160 else:
161 if verbosity >= 0:
162 args.append("--stats")
163 if verbosity == 0:
164 args.append("--info=progress2")
165 if verbosity >= 1:
166 args.append("--progress")
167 if verbosity > 0:
168 args.append("-" + "v" * verbosity)
170 for pattern in exclude:
171 args.extend(("--exclude", pattern))
172 args.extend(additional_args)
173 args.extend((source, destination_folder))
175 SYNC_LOGGER.debug(
176 "Executing the following command:\n %s",
177 " ".join(args),
178 )
180 with subprocess.Popen(
181 args,
182 stdout=subprocess.PIPE if dry_run else None,
183 stderr=subprocess.PIPE,
184 cwd=working_directory,
185 ) as proc:
186 if timeout:
187 try:
188 proc.wait(timeout)
189 except subprocess.TimeoutExpired as times_up:
190 proc.kill()
191 if proc.stdout is not None:
192 if output_log := proc.stdout.read().decode("UTF-8"):
193 SYNC_LOGGER.warning(output_log)
194 if proc.stderr is not None:
195 if error_log := proc.stderr.read().decode("UTF-8"):
196 SYNC_LOGGER.error(error_log)
197 raise TimeoutError("Timeout reached.") from times_up
199 if proc.stdout is not None:
200 if output_log := proc.stdout.read().decode("UTF-8"):
201 if verbosity > 0:
202 dry_run_output = output_log.splitlines()
203 else:
204 dry_run_output = summarize_rsync_report(output_log)
205 SYNC_LOGGER.info("\nSUMMARY\n-------")
206 for line in dry_run_output:
207 if _is_important_stats_line(line):
208 SYNC_LOGGER.log(25, line)
209 else:
210 SYNC_LOGGER.debug(line)
212 if proc.stderr is not None:
213 if error_log := proc.stderr.read().decode("UTF-8"):
214 if "No such file or directory" in error_log:
215 raise FileNotFoundError(error_log)
216 raise RuntimeError(error_log) # pragma: no cover
219def summarize_rsync_report(raw_output: str, depth: int = 2) -> list[str]:
220 """Take the captured output from running
221 `rsync -ha --out-format="%i %n"`
222 and report a high-level summary to the logging.INFO level
224 Parameters
225 ----------
226 raw_output : str
227 The raw output captured from running the rsync command
228 depth : int, optional
229 How many directories to go down from the root to generate the summary.
230 Default is 2 (just report on top-level files and folders within the
231 source folder).
233 Returns
234 -------
235 list of str
236 Any lines that weren't part of the rsync report (and were probably
237 part of `--stats`?)
239 Notes
240 -----
241 The rsync man page (https://linux.die.net/man/1/rsync) describes the output
242 format as... "cryptic," which I find rather charitable. The relevant bits
243 are that `--out-format="%i %n"` produces:
244 - `%i` : a string of 11 characters that gives various metadata about the file
245 transfer operation (is it a file, a directory or a link? Is it being
246 sent or received? Created, updated or deleted?)
247 - `%n`: the path of the file (or whatever), unquoted, un-escaped
248 """
249 summary: dict[str, dict[str, int] | str] = defaultdict(
250 lambda: {"create": 0, "update": 0, "delete": 0}
251 )
252 stats: list[str] = []
253 for line in raw_output.splitlines():
254 if line == "": # skip empty lines
255 continue
257 info = line.split()[0]
258 full_path = os.path.normpath(" ".join(line.split()[1:]))
259 path_key = os.sep.join(full_path.split(os.sep)[:depth])
261 if info.startswith("*deleting"):
262 if full_path == path_key:
263 summary[path_key] = "delete"
264 else:
265 entry = summary[path_key]
266 if not isinstance(entry, str):
267 entry["delete"] += 1
268 # otherwise the whole thing is being deleted
269 elif info[2:5] == "+++": # this is a creation
270 if full_path == path_key:
271 summary[path_key] = "create"
272 else:
273 if info[1] != "d": # don't count directories
274 entry = summary[path_key]
275 if isinstance(entry, str):
276 # then this is described by the top-level op
277 pass
278 else:
279 entry["create"] += 1
280 # otherwise the whole key is being created
281 elif info[:2] in ("<f", ">f"): # file transfer
282 # and remember that creates were caught above, so this must be an update
283 if full_path == path_key:
284 summary[path_key] = "update"
285 else:
286 entry = summary[path_key]
287 if isinstance(entry, str): # pragma: no cover
288 # this should never happen, but still
289 pass
290 else:
291 entry["update"] += 1
292 elif info[:2] == "cL": # this is replacing a link, as far as I can tell
293 if full_path == path_key:
294 summary[path_key] = "update"
295 else:
296 entry = summary[path_key]
297 if isinstance(entry, str): # pragma: no cover
298 # this should never happen, but still
299 pass
300 else:
301 entry["update"] += 1
302 elif info[:1] == ".": # pragma: no cover
303 # this just means permissions or dates are being updated or something
304 pass
305 else: # then hopefully this is part of the stats report
306 stats.append(line)
307 continue
309 SYNC_LOGGER.debug(line)
311 for path_key, report in sorted(summary.items()):
312 if isinstance(report, str):
313 # nice that these verbs follow the same pattern
314 SYNC_LOGGER.info(f"{report[:-1].title()}ing %s", path_key)
315 else:
316 SYNC_LOGGER.info(
317 "Within %s...\n%s",
318 path_key,
319 "\n".join(
320 f" - {op[:-1].title()}ing {count} file{'' if count == 1 else 's'}"
321 for op, count in report.items()
322 ),
323 )
324 return stats
327def _is_important_stats_line(line: str) -> bool:
328 """Determine if a stats line is worth logging at the INFO level (or whether
329 it should be relegated to the DEBUG log)
331 Parameters
332 ----------
333 line : str
334 The log line to evaluate
336 Returns
337 -------
338 bool
339 True if and only if the line is identified as important
340 """
341 return line.startswith(
342 (
343 "Number of created files:",
344 "Number of deleted files:",
345 "Number of regular files transferred:",
346 "Total transferred file size:",
347 )
348 )
351def pull(
352 remote_uri: ParseResult,
353 local_path: Path,
354 exclude: Iterable[str],
355 dry_run: bool,
356 use_daemon: bool = False,
357 timeout: int | None = None,
358 delete: bool = True,
359 verbosity: int = 0,
360 rsync_args: Iterable[str] | None = None,
361) -> None:
362 """Sync an upstream file or folder into the specified location using rsync.
363 This will overwrite any files and folders already at the destination.
365 Parameters
366 ----------
367 remote_uri : ParseResult
368 The URI for the remote resource to copy from
369 local_path : Path
370 The destination folder
371 exclude : list of str
372 Any patterns that should be excluded from the sync
373 dry_run : bool
374 Whether to only simulate this sync (report the operations to be performed
375 but not actually perform them)
376 use_daemon : bool, optional
377 By default, the rsync is performed over ssh. If you happen to have an
378 rsync daemon running on your system, however, you're welcome to leverage
379 it instead by passing in `use_daemon=True`
380 timeout : int, optional
381 The number of seconds to wait before timing out the sync operation.
382 If None is provided, no explicit timeout value will be set.
383 delete : bool, optional
384 Whether part of the syncing should include deleting files at the destination
385 that aren't at the source. Default is True.
386 verbosity : int
387 A modifier for how much info to output either to stdout or the INFO-level
388 logs. Defaults to 0.
389 rsync_args: list of str, optional
390 Any additional arguments to pass into rsync. Note that rsync is run by
391 default with the flags: `-shaz`
393 Raises
394 ------
395 FileNotFoundError
396 If the destination folder does not exist
398 Notes
399 -----
400 - This method does not provide for interactive authentication. If using
401 rsync over SSH, you'll need to be set up for password-less (key-based)
402 access.
403 - If the destination folder does not already exist, this method will not
404 create it or its parent directories.
405 """
406 if not local_path.exists():
407 raise FileNotFoundError(f"{local_path} does not exist")
409 if remote_uri.netloc == get_default_netloc():
410 SYNC_LOGGER.debug("Performing sync as a local transfer")
411 remote_path: str = unquote(remote_uri.path)
412 elif use_daemon:
413 remote_path = remote_uri.geturl()
414 else:
415 remote_path = uri_to_ssh(remote_uri)
417 if rsync_args: # pragma: no cover
418 raise NotImplementedError
420 run_rsync(
421 local_path.parent,
422 remote_path,
423 local_path.name,
424 delete,
425 dry_run,
426 exclude,
427 *(rsync_args or ()),
428 timeout=timeout,
429 verbosity=verbosity,
430 )
433def push(
434 local_path: Path,
435 remote_uri: ParseResult,
436 exclude: Iterable[str],
437 dry_run: bool,
438 use_daemon: bool = False,
439 timeout: int | None = None,
440 delete: bool = True,
441 verbosity: int = 0,
442 rsync_args: Iterable[str] | None = None,
443) -> None:
444 """Sync a local file or folder into the specified location using rsync.
445 This will overwrite any files and folders already at the destination.
447 Parameters
448 ----------
449 local_path : Path
450 The file or folder to copy
451 remote_uri : ParseResult
452 The URI for the remote location to copy into
453 exclude : list of str
454 Any patterns that should be excluded from the sync
455 dry_run : bool
456 Whether to only simulate this sync (report the operations to be performed
457 but not actually perform them)
458 use_daemon : bool, optional
459 By default, the rsync is performed over ssh. If you happen to have an
460 rsync daemon running on your system, however, you're welcome to leverage
461 it instead by passing in `use_daemon=True`
462 timeout : int, optional
463 The number of seconds to wait before timing out the sync operation.
464 If None is provided, no explicit timeout value will be set.
465 delete : bool, optional
466 Whether part of the syncing should include deleting files at the destination
467 that aren't at the source. Default is True.
468 verbosity : int
469 A modifier for how much info to output either to stdout or the INFO-level
470 logs. Defaults to 0.
471 rsync_args: list of str, optional
472 Any additional arguments to pass into rsync. Note that rsync is run by
473 default with the flags: `-shaz`
475 Notes
476 -----
477 - This method does not provide for interactive authentication. If using
478 rsync over SSH, you'll need to be set up for password-less (key-based)
479 access.
480 - If the destination folder does not already exist, this method will very
481 likely fail.
482 """
483 if remote_uri.netloc == get_default_netloc():
484 SYNC_LOGGER.debug("Performing sync as a local transfer")
485 remote_path: str = unquote(remote_uri.path)
486 elif use_daemon:
487 remote_path = remote_uri.geturl()
488 else:
489 remote_path = uri_to_ssh(remote_uri)
491 if rsync_args: # pragma: no cover
492 raise NotImplementedError
494 run_rsync(
495 local_path.parent,
496 local_path.name,
497 remote_path,
498 delete,
499 dry_run,
500 exclude,
501 *(rsync_args or ()),
502 timeout=timeout,
503 verbosity=verbosity,
504 )