Coverage for enderchest/sync/rsync.py: 87%

138 statements  

« prev     ^ index     » next       coverage.py v7.10.1, created at 2025-07-30 12:06 +0000

1"""rsync sync implementation. Relies on the user having rsync installed on their system""" 

2 

3import os.path 

4import re 

5import shutil 

6import subprocess 

7from collections import defaultdict 

8from collections.abc import Iterable 

9from pathlib import Path 

10from urllib.parse import ParseResult, unquote 

11 

12from . import SYNC_LOGGER, get_default_netloc, uri_to_ssh 

13 

14RSYNC = shutil.which("rsync") 

15if RSYNC is None: # pragma: no cover 

16 raise RuntimeError( 

17 "No rsync executable found on your system. Cannot sync using this protocol." 

18 ) 

19 

20 

21def _get_rsync_version() -> tuple[int, int]: 

22 """Determine the installed version of Rsync 

23 

24 Returns 

25 ------- 

26 int 

27 The major version of the resolved Rsync executable 

28 int 

29 The minor version of the resolved Rsync executable 

30 

31 Raises 

32 ----- 

33 RuntimeError 

34 If Rsync is not installed, if `rsync --version` returns an error or if 

35 the version information cannot be decoded from the `rsync --version` 

36 output 

37 """ 

38 try: 

39 result = subprocess.run( 

40 ["rsync", "--version"], 

41 stdout=subprocess.PIPE, 

42 stderr=subprocess.PIPE, 

43 check=False, 

44 ) 

45 if result.stderr: # TODO: #124 just use check=True 

46 raise RuntimeError(result.stderr.decode("utf-8")) 

47 

48 head = result.stdout.decode("utf-8").splitlines()[0] 

49 except (FileNotFoundError, IndexError): 

50 raise RuntimeError("Rsync is not installed or could not be executed.") 

51 

52 try: 

53 if match := re.match( 

54 r"^rsync[\s]+version ([0-9]+).([0-9]+).([0-9]+)", 

55 head, 

56 ): 

57 major, minor, *_ = match.groups() 

58 return int(major), int(minor) 

59 raise AssertionError 

60 except (AssertionError, ValueError): 

61 raise RuntimeError(f"Could not parse version output:\n{head}") 

62 

63 

64rsync_version = _get_rsync_version() 

65if rsync_version < (3, 2): 

66 raise RuntimeError( 

67 "EnderChest requires Rsync 3.2 or newer." 

68 " The version detected on your system is {}.{}".format(*rsync_version) 

69 ) 

70 

71 

72def run_rsync( 

73 working_directory: Path, 

74 source: str, 

75 destination_folder: str, 

76 delete: bool, 

77 dry_run: bool, 

78 exclude: Iterable[str], 

79 *additional_args: str, 

80 timeout: int | None = None, 

81 verbosity: int = 0, 

82 rsync_flags: str | None = None, 

83) -> None: 

84 """Run an operation with rsync 

85 

86 Parameters 

87 ---------- 

88 working_directory : Path 

89 The working directory to run the sync command from 

90 source : str 

91 The source file or folder to sync, specified as either a URI string, 

92 an ssh address or a path relative to the working directory 

93 destination_folder : str 

94 The destination folder where the file or folder should be synced to, 

95 with the same formats available as for source 

96 delete : bool 

97 Whether part of the syncing should include deleting files at the destination 

98 that aren't at the source 

99 dry_run : bool 

100 Whether to only simulate this sync (report the operations to be performed 

101 but not actually perform them) 

102 exclude : list of str 

103 Any patterns that should be excluded from the sync (and sync) 

104 *additional_args : str 

105 Any additional arguments to pass into the rsync command 

106 timeout : int, optional 

107 The number of seconds to wait before timing out the sync operation. 

108 If None is provided, no explicit timeout value will be set. 

109 verbosity : int 

110 A modifier for how much info to output either to stdout or the INFO-level 

111 logs. At... 

112 

113 - verbosity = -2 : No information will be printed, even on dry runs 

114 - verbosity = -1 : The sync itself will be silent. Dry runs will only 

115 report the sync statistics. 

116 - verbosity = 0 : Actual syncs will display a progress bar. Dry run 

117 reports will summarize the changes to each shulker 

118 box in addition to reporting the sync statistics . 

119 - verbosity = 1 : Actual syncs will report the progress of each file 

120 transfer. Dry runs will report on each file to 

121 be created, updated or deleted. 

122 - verbosity = 2 : Dry runs and syncs will print or log the output 

123 of rsync run using the `-vv` modifier 

124 

125 Verbosity values outside of this range will simply be capped / floored 

126 to [-2, 2]. 

127 rsync_flags : str, optional 

128 By default, rsync will be run using the flags "shaz" which means: 

129 

130 - no space splitting 

131 - use output (file sizes, mostly) human-readable 

132 - archive mode (see: https://www.baeldung.com/linux/rsync-archive-mode) 

133 - compress data during transfer 

134 

135 Advanced users may choose to override these options, but **you do so 

136 at your own peril**. 

137 

138 Raises 

139 ------ 

140 TimeoutError 

141 If the rsync operation times out before completion 

142 RuntimeError 

143 If the rsync operation fails for any other reason 

144 

145 Notes 

146 ----- 

147 This method does not perform any validation or normalization of the source, 

148 destination, exclude-list, additional arguments or rsync options. 

149 """ 

150 rsync_flags = rsync_flags or "shaz" 

151 

152 args: list[str] = [RSYNC, f"-{rsync_flags}"] # type: ignore[list-item] 

153 if delete: 

154 args.append("--delete") 

155 if dry_run: 

156 args.extend(("--dry-run", "--stats")) 

157 if verbosity < 1: 

158 # at 1+ we don't need it to be machine-parseable 

159 args.append("--out-format=%i %n") 

160 else: 

161 if verbosity >= 0: 

162 args.append("--stats") 

163 if verbosity == 0: 

164 args.append("--info=progress2") 

165 if verbosity >= 1: 

166 args.append("--progress") 

167 if verbosity > 0: 

168 args.append("-" + "v" * verbosity) 

169 

170 for pattern in exclude: 

171 args.extend(("--exclude", pattern)) 

172 args.extend(additional_args) 

173 args.extend((source, destination_folder)) 

174 

175 SYNC_LOGGER.debug( 

176 "Executing the following command:\n %s", 

177 " ".join(args), 

178 ) 

179 

180 with subprocess.Popen( 

181 args, 

182 stdout=subprocess.PIPE if dry_run else None, 

183 stderr=subprocess.PIPE, 

184 cwd=working_directory, 

185 ) as proc: 

186 if timeout: 

187 try: 

188 proc.wait(timeout) 

189 except subprocess.TimeoutExpired as times_up: 

190 proc.kill() 

191 if proc.stdout is not None: 

192 if output_log := proc.stdout.read().decode("UTF-8"): 

193 SYNC_LOGGER.warning(output_log) 

194 if proc.stderr is not None: 

195 if error_log := proc.stderr.read().decode("UTF-8"): 

196 SYNC_LOGGER.error(error_log) 

197 raise TimeoutError("Timeout reached.") from times_up 

198 

199 if proc.stdout is not None: 

200 if output_log := proc.stdout.read().decode("UTF-8"): 

201 if verbosity > 0: 

202 dry_run_output = output_log.splitlines() 

203 else: 

204 dry_run_output = summarize_rsync_report(output_log) 

205 SYNC_LOGGER.info("\nSUMMARY\n-------") 

206 for line in dry_run_output: 

207 if _is_important_stats_line(line): 

208 SYNC_LOGGER.log(25, line) 

209 else: 

210 SYNC_LOGGER.debug(line) 

211 

212 if proc.stderr is not None: 

213 if error_log := proc.stderr.read().decode("UTF-8"): 

214 if "No such file or directory" in error_log: 

215 raise FileNotFoundError(error_log) 

216 raise RuntimeError(error_log) # pragma: no cover 

217 

218 

219def summarize_rsync_report(raw_output: str, depth: int = 2) -> list[str]: 

220 """Take the captured output from running 

221 `rsync -ha --out-format="%i %n"` 

222 and report a high-level summary to the logging.INFO level 

223 

224 Parameters 

225 ---------- 

226 raw_output : str 

227 The raw output captured from running the rsync command 

228 depth : int, optional 

229 How many directories to go down from the root to generate the summary. 

230 Default is 2 (just report on top-level files and folders within the 

231 source folder). 

232 

233 Returns 

234 ------- 

235 list of str 

236 Any lines that weren't part of the rsync report (and were probably 

237 part of `--stats`?) 

238 

239 Notes 

240 ----- 

241 The rsync man page (https://linux.die.net/man/1/rsync) describes the output 

242 format as... "cryptic," which I find rather charitable. The relevant bits 

243 are that `--out-format="%i %n"` produces: 

244 - `%i` : a string of 11 characters that gives various metadata about the file 

245 transfer operation (is it a file, a directory or a link? Is it being 

246 sent or received? Created, updated or deleted?) 

247 - `%n`: the path of the file (or whatever), unquoted, un-escaped 

248 """ 

249 summary: dict[str, dict[str, int] | str] = defaultdict( 

250 lambda: {"create": 0, "update": 0, "delete": 0} 

251 ) 

252 stats: list[str] = [] 

253 for line in raw_output.splitlines(): 

254 if line == "": # skip empty lines 

255 continue 

256 

257 info = line.split()[0] 

258 full_path = os.path.normpath(" ".join(line.split()[1:])) 

259 path_key = os.sep.join(full_path.split(os.sep)[:depth]) 

260 

261 if info.startswith("*deleting"): 

262 if full_path == path_key: 

263 summary[path_key] = "delete" 

264 else: 

265 entry = summary[path_key] 

266 if not isinstance(entry, str): 

267 entry["delete"] += 1 

268 # otherwise the whole thing is being deleted 

269 elif info[2:5] == "+++": # this is a creation 

270 if full_path == path_key: 

271 summary[path_key] = "create" 

272 else: 

273 if info[1] != "d": # don't count directories 

274 entry = summary[path_key] 

275 if isinstance(entry, str): 

276 # then this is described by the top-level op 

277 pass 

278 else: 

279 entry["create"] += 1 

280 # otherwise the whole key is being created 

281 elif info[:2] in ("<f", ">f"): # file transfer 

282 # and remember that creates were caught above, so this must be an update 

283 if full_path == path_key: 

284 summary[path_key] = "update" 

285 else: 

286 entry = summary[path_key] 

287 if isinstance(entry, str): # pragma: no cover 

288 # this should never happen, but still 

289 pass 

290 else: 

291 entry["update"] += 1 

292 elif info[:2] == "cL": # this is replacing a link, as far as I can tell 

293 if full_path == path_key: 

294 summary[path_key] = "update" 

295 else: 

296 entry = summary[path_key] 

297 if isinstance(entry, str): # pragma: no cover 

298 # this should never happen, but still 

299 pass 

300 else: 

301 entry["update"] += 1 

302 elif info[:1] == ".": # pragma: no cover 

303 # this just means permissions or dates are being updated or something 

304 pass 

305 else: # then hopefully this is part of the stats report 

306 stats.append(line) 

307 continue 

308 

309 SYNC_LOGGER.debug(line) 

310 

311 for path_key, report in sorted(summary.items()): 

312 if isinstance(report, str): 

313 # nice that these verbs follow the same pattern 

314 SYNC_LOGGER.info(f"{report[:-1].title()}ing %s", path_key) 

315 else: 

316 SYNC_LOGGER.info( 

317 "Within %s...\n%s", 

318 path_key, 

319 "\n".join( 

320 f" - {op[:-1].title()}ing {count} file{'' if count == 1 else 's'}" 

321 for op, count in report.items() 

322 ), 

323 ) 

324 return stats 

325 

326 

327def _is_important_stats_line(line: str) -> bool: 

328 """Determine if a stats line is worth logging at the INFO level (or whether 

329 it should be relegated to the DEBUG log) 

330 

331 Parameters 

332 ---------- 

333 line : str 

334 The log line to evaluate 

335 

336 Returns 

337 ------- 

338 bool 

339 True if and only if the line is identified as important 

340 """ 

341 return line.startswith( 

342 ( 

343 "Number of created files:", 

344 "Number of deleted files:", 

345 "Number of regular files transferred:", 

346 "Total transferred file size:", 

347 ) 

348 ) 

349 

350 

351def pull( 

352 remote_uri: ParseResult, 

353 local_path: Path, 

354 exclude: Iterable[str], 

355 dry_run: bool, 

356 use_daemon: bool = False, 

357 timeout: int | None = None, 

358 delete: bool = True, 

359 verbosity: int = 0, 

360 rsync_args: Iterable[str] | None = None, 

361) -> None: 

362 """Sync an upstream file or folder into the specified location using rsync. 

363 This will overwrite any files and folders already at the destination. 

364 

365 Parameters 

366 ---------- 

367 remote_uri : ParseResult 

368 The URI for the remote resource to copy from 

369 local_path : Path 

370 The destination folder 

371 exclude : list of str 

372 Any patterns that should be excluded from the sync 

373 dry_run : bool 

374 Whether to only simulate this sync (report the operations to be performed 

375 but not actually perform them) 

376 use_daemon : bool, optional 

377 By default, the rsync is performed over ssh. If you happen to have an 

378 rsync daemon running on your system, however, you're welcome to leverage 

379 it instead by passing in `use_daemon=True` 

380 timeout : int, optional 

381 The number of seconds to wait before timing out the sync operation. 

382 If None is provided, no explicit timeout value will be set. 

383 delete : bool, optional 

384 Whether part of the syncing should include deleting files at the destination 

385 that aren't at the source. Default is True. 

386 verbosity : int 

387 A modifier for how much info to output either to stdout or the INFO-level 

388 logs. Defaults to 0. 

389 rsync_args: list of str, optional 

390 Any additional arguments to pass into rsync. Note that rsync is run by 

391 default with the flags: `-shaz` 

392 

393 Raises 

394 ------ 

395 FileNotFoundError 

396 If the destination folder does not exist 

397 

398 Notes 

399 ----- 

400 - This method does not provide for interactive authentication. If using 

401 rsync over SSH, you'll need to be set up for password-less (key-based) 

402 access. 

403 - If the destination folder does not already exist, this method will not 

404 create it or its parent directories. 

405 """ 

406 if not local_path.exists(): 

407 raise FileNotFoundError(f"{local_path} does not exist") 

408 

409 if remote_uri.netloc == get_default_netloc(): 

410 SYNC_LOGGER.debug("Performing sync as a local transfer") 

411 remote_path: str = unquote(remote_uri.path) 

412 elif use_daemon: 

413 remote_path = remote_uri.geturl() 

414 else: 

415 remote_path = uri_to_ssh(remote_uri) 

416 

417 if rsync_args: # pragma: no cover 

418 raise NotImplementedError 

419 

420 run_rsync( 

421 local_path.parent, 

422 remote_path, 

423 local_path.name, 

424 delete, 

425 dry_run, 

426 exclude, 

427 *(rsync_args or ()), 

428 timeout=timeout, 

429 verbosity=verbosity, 

430 ) 

431 

432 

433def push( 

434 local_path: Path, 

435 remote_uri: ParseResult, 

436 exclude: Iterable[str], 

437 dry_run: bool, 

438 use_daemon: bool = False, 

439 timeout: int | None = None, 

440 delete: bool = True, 

441 verbosity: int = 0, 

442 rsync_args: Iterable[str] | None = None, 

443) -> None: 

444 """Sync a local file or folder into the specified location using rsync. 

445 This will overwrite any files and folders already at the destination. 

446 

447 Parameters 

448 ---------- 

449 local_path : Path 

450 The file or folder to copy 

451 remote_uri : ParseResult 

452 The URI for the remote location to copy into 

453 exclude : list of str 

454 Any patterns that should be excluded from the sync 

455 dry_run : bool 

456 Whether to only simulate this sync (report the operations to be performed 

457 but not actually perform them) 

458 use_daemon : bool, optional 

459 By default, the rsync is performed over ssh. If you happen to have an 

460 rsync daemon running on your system, however, you're welcome to leverage 

461 it instead by passing in `use_daemon=True` 

462 timeout : int, optional 

463 The number of seconds to wait before timing out the sync operation. 

464 If None is provided, no explicit timeout value will be set. 

465 delete : bool, optional 

466 Whether part of the syncing should include deleting files at the destination 

467 that aren't at the source. Default is True. 

468 verbosity : int 

469 A modifier for how much info to output either to stdout or the INFO-level 

470 logs. Defaults to 0. 

471 rsync_args: list of str, optional 

472 Any additional arguments to pass into rsync. Note that rsync is run by 

473 default with the flags: `-shaz` 

474 

475 Notes 

476 ----- 

477 - This method does not provide for interactive authentication. If using 

478 rsync over SSH, you'll need to be set up for password-less (key-based) 

479 access. 

480 - If the destination folder does not already exist, this method will very 

481 likely fail. 

482 """ 

483 if remote_uri.netloc == get_default_netloc(): 

484 SYNC_LOGGER.debug("Performing sync as a local transfer") 

485 remote_path: str = unquote(remote_uri.path) 

486 elif use_daemon: 

487 remote_path = remote_uri.geturl() 

488 else: 

489 remote_path = uri_to_ssh(remote_uri) 

490 

491 if rsync_args: # pragma: no cover 

492 raise NotImplementedError 

493 

494 run_rsync( 

495 local_path.parent, 

496 local_path.name, 

497 remote_path, 

498 delete, 

499 dry_run, 

500 exclude, 

501 *(rsync_args or ()), 

502 timeout=timeout, 

503 verbosity=verbosity, 

504 )