Coverage for enderchest/sync/rsync.py: 87%

138 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-02-06 16:00 +0000

1"""rsync sync implementation. Relies on the user having rsync installed on their system""" 

2import os.path 

3import re 

4import shutil 

5import subprocess 

6from collections import defaultdict 

7from pathlib import Path 

8from typing import Iterable 

9from urllib.parse import ParseResult, unquote 

10 

11from . import SYNC_LOGGER, get_default_netloc, uri_to_ssh 

12 

13RSYNC = shutil.which("rsync") 

14if RSYNC is None: # pragma: no cover 

15 raise RuntimeError( 

16 "No rsync executable found on your system. Cannot sync using this protocol." 

17 ) 

18 

19 

20def _get_rsync_version() -> tuple[int, int]: 

21 """Determine the installed version of Rsync 

22 

23 Returns 

24 ------- 

25 int 

26 The major version of the resolved Rsync executable 

27 int 

28 The minor version of the resolved Rsync executable 

29 

30 Raises 

31 ----- 

32 RuntimeError 

33 If Rsync is not installed, if `rsync --version` returns an error or if 

34 the version information cannot be decoded from the `rsync --version` 

35 output 

36 """ 

37 try: 

38 result = subprocess.run( 

39 ["rsync", "--version"], 

40 stdout=subprocess.PIPE, 

41 stderr=subprocess.PIPE, 

42 check=False, 

43 ) 

44 if result.stderr: # TODO: #124 just use check=True 

45 raise RuntimeError(result.stderr.decode("utf-8")) 

46 

47 head = result.stdout.decode("utf-8").splitlines()[0] 

48 except (FileNotFoundError, IndexError): 

49 raise RuntimeError("Rsync is not installed or could not be executed.") 

50 

51 try: 

52 if match := re.match( 

53 r"^rsync[\s]+version ([0-9]+).([0-9]+).([0-9]+)", 

54 head, 

55 ): 

56 major, minor, *_ = match.groups() 

57 return int(major), int(minor) 

58 raise AssertionError 

59 except (AssertionError, ValueError): 

60 raise RuntimeError(f"Could not parse version output:\n{head}") 

61 

62 

63rsync_version = _get_rsync_version() 

64if rsync_version < (3, 2): 

65 raise RuntimeError( 

66 "EnderChest requires Rsync 3.2 or newer." 

67 " The version detected on your system is {}.{}".format(*rsync_version) 

68 ) 

69 

70 

71def run_rsync( 

72 working_directory: Path, 

73 source: str, 

74 destination_folder: str, 

75 delete: bool, 

76 dry_run: bool, 

77 exclude: Iterable[str], 

78 *additional_args: str, 

79 timeout: int | None = None, 

80 verbosity: int = 0, 

81 rsync_flags: str | None = None, 

82) -> None: 

83 """Run an operation with rsync 

84 

85 Parameters 

86 ---------- 

87 working_directory : Path 

88 The working directory to run the sync command from 

89 source : str 

90 The source file or folder to sync, specified as either a URI string, 

91 an ssh address or a path relative to the working directory 

92 destination_folder : str 

93 The destination folder where the file or folder should be synced to, 

94 with the same formats available as for source 

95 delete : bool 

96 Whether part of the syncing should include deleting files at the destination 

97 that aren't at the source 

98 dry_run : bool 

99 Whether to only simulate this sync (report the operations to be performed 

100 but not actually perform them) 

101 exclude : list of str 

102 Any patterns that should be excluded from the sync (and sync) 

103 *additional_args : str 

104 Any additional arguments to pass into the rsync command 

105 timeout : int, optional 

106 The number of seconds to wait before timing out the sync operation. 

107 If None is provided, no explicit timeout value will be set. 

108 verbosity : int 

109 A modifier for how much info to output either to stdout or the INFO-level 

110 logs. At... 

111 

112 - verbosity = -2 : No information will be printed, even on dry runs 

113 - verbosity = -1 : The sync itself will be silent. Dry runs will only 

114 report the sync statistics. 

115 - verbosity = 0 : Actual syncs will display a progress bar. Dry run 

116 reports will summarize the changes to each shulker 

117 box in addition to reporting the sync statistics . 

118 - verbosity = 1 : Actual syncs will report the progress of each file 

119 transfer. Dry runs will report on each file to 

120 be created, updated or deleted. 

121 - verbosity = 2 : Dry runs and syncs will print or log the output 

122 of rsync run using the `-vv` modifier 

123 

124 Verbosity values outside of this range will simply be capped / floored 

125 to [-2, 2]. 

126 rsync_flags : str, optional 

127 By default, rsync will be run using the flags "shaz" which means: 

128 

129 - no space splitting 

130 - use output (file sizes, mostly) human-readable 

131 - archive mode (see: https://www.baeldung.com/linux/rsync-archive-mode) 

132 - compress data during transfer 

133 

134 Advanced users may choose to override these options, but **you do so 

135 at your own peril**. 

136 

137 Raises 

138 ------ 

139 TimeoutError 

140 If the rsync operation times out before completion 

141 RuntimeError 

142 If the rsync operation fails for any other reason 

143 

144 Notes 

145 ----- 

146 This method does not perform any validation or normalization of the source, 

147 destination, exclude-list, additional arguments or rsync options. 

148 """ 

149 rsync_flags = rsync_flags or "shaz" 

150 

151 args: list[str] = [RSYNC, f"-{rsync_flags}"] # type: ignore[list-item] 

152 if delete: 

153 args.append("--delete") 

154 if dry_run: 

155 args.extend(("--dry-run", "--stats")) 

156 if verbosity < 1: 

157 # at 1+ we don't need it to be machine-parseable 

158 args.append("--out-format=%i %n") 

159 else: 

160 if verbosity >= 0: 

161 args.append("--stats") 

162 if verbosity == 0: 

163 args.append("--info=progress2") 

164 if verbosity >= 1: 

165 args.append("--progress") 

166 if verbosity > 0: 

167 args.append("-" + "v" * verbosity) 

168 

169 for pattern in exclude: 

170 args.extend(("--exclude", pattern)) 

171 args.extend(additional_args) 

172 args.extend((source, destination_folder)) 

173 

174 SYNC_LOGGER.debug( 

175 "Executing the following command:\n %s", 

176 " ".join(args), 

177 ) 

178 

179 with subprocess.Popen( 

180 args, 

181 stdout=subprocess.PIPE if dry_run else None, 

182 stderr=subprocess.PIPE, 

183 cwd=working_directory, 

184 ) as proc: 

185 if timeout: 

186 try: 

187 proc.wait(timeout) 

188 except subprocess.TimeoutExpired as times_up: 

189 proc.kill() 

190 if proc.stdout is not None: 

191 if output_log := proc.stdout.read().decode("UTF-8"): 

192 SYNC_LOGGER.warning(output_log) 

193 if proc.stderr is not None: 

194 if error_log := proc.stderr.read().decode("UTF-8"): 

195 SYNC_LOGGER.error(error_log) 

196 raise TimeoutError("Timeout reached.") from times_up 

197 

198 if proc.stdout is not None: 

199 if output_log := proc.stdout.read().decode("UTF-8"): 

200 if verbosity > 0: 

201 dry_run_output = output_log.splitlines() 

202 else: 

203 dry_run_output = summarize_rsync_report(output_log) 

204 SYNC_LOGGER.info("\nSUMMARY\n-------") 

205 for line in dry_run_output: 

206 if _is_important_stats_line(line): 

207 SYNC_LOGGER.log(25, line) 

208 else: 

209 SYNC_LOGGER.debug(line) 

210 

211 if proc.stderr is not None: 

212 if error_log := proc.stderr.read().decode("UTF-8"): 

213 if "No such file or directory" in error_log: 

214 raise FileNotFoundError(error_log) 

215 raise RuntimeError(error_log) # pragma: no cover 

216 

217 

218def summarize_rsync_report(raw_output: str, depth: int = 2) -> list[str]: 

219 """Take the captured output from running 

220 `rsync -ha --out-format="%i %n"` 

221 and report a high-level summary to the logging.INFO level 

222 

223 Parameters 

224 ---------- 

225 raw_output : str 

226 The raw output captured from running the rsync command 

227 depth : int, optional 

228 How many directories to go down from the root to generate the summary. 

229 Default is 2 (just report on top-level files and folders within the 

230 source folder). 

231 

232 Returns 

233 ------- 

234 list of str 

235 Any lines that weren't part of the rsync report (and were probably 

236 part of `--stats`?) 

237 

238 Notes 

239 ----- 

240 The rsync man page (https://linux.die.net/man/1/rsync) describes the output 

241 format as... "cryptic," which I find rather charitable. The relevant bits 

242 are that `--out-format="%i %n"` produces: 

243 - `%i` : a string of 11 characters that gives various metadata about the file 

244 transfer operation (is it a file, a directory or a link? Is it being 

245 sent or received? Created, updated or deleted?) 

246 - `%n`: the path of the file (or whatever), unquoted, un-escaped 

247 """ 

248 summary: dict[str, dict[str, int] | str] = defaultdict( 

249 lambda: {"create": 0, "update": 0, "delete": 0} 

250 ) 

251 stats: list[str] = [] 

252 for line in raw_output.splitlines(): 

253 if line == "": # skip empty lines 

254 continue 

255 

256 info = line.split()[0] 

257 full_path = os.path.normpath(" ".join(line.split()[1:])) 

258 path_key = os.sep.join(full_path.split(os.sep)[:depth]) 

259 

260 if info.startswith("*deleting"): 

261 if full_path == path_key: 

262 summary[path_key] = "delete" 

263 else: 

264 entry = summary[path_key] 

265 if not isinstance(entry, str): 

266 entry["delete"] += 1 

267 # otherwise the whole thing is being deleted 

268 elif info[2:5] == "+++": # this is a creation 

269 if full_path == path_key: 

270 summary[path_key] = "create" 

271 else: 

272 if info[1] != "d": # don't count directories 

273 entry = summary[path_key] 

274 if isinstance(entry, str): 

275 # then this is described by the top-level op 

276 pass 

277 else: 

278 entry["create"] += 1 

279 # otherwise the whole key is being created 

280 elif info[:2] in ("<f", ">f"): # file transfer 

281 # and remember that creates were caught above, so this must be an update 

282 if full_path == path_key: 

283 summary[path_key] = "update" 

284 else: 

285 entry = summary[path_key] 

286 if isinstance(entry, str): # pragma: no cover 

287 # this should never happen, but still 

288 pass 

289 else: 

290 entry["update"] += 1 

291 elif info[:2] == "cL": # this is replacing a link, as far as I can tell 

292 if full_path == path_key: 

293 summary[path_key] = "update" 

294 else: 

295 entry = summary[path_key] 

296 if isinstance(entry, str): # pragma: no cover 

297 # this should never happen, but still 

298 pass 

299 else: 

300 entry["update"] += 1 

301 elif info[:1] == ".": # pragma: no cover 

302 # this just means permissions or dates are being updated or something 

303 pass 

304 else: # then hopefully this is part of the stats report 

305 stats.append(line) 

306 continue 

307 

308 SYNC_LOGGER.debug(line) 

309 

310 for path_key, report in sorted(summary.items()): 

311 if isinstance(report, str): 

312 # nice that these verbs follow the same pattern 

313 SYNC_LOGGER.info(f"{report[:-1].title()}ing {path_key}") 

314 else: 

315 SYNC_LOGGER.info( 

316 f"Within {path_key}...\n%s", 

317 "\n".join( 

318 f" - {op[:-1].title()}ing {count} file{'' if count == 1 else 's'}" 

319 for op, count in report.items() 

320 ), 

321 ) 

322 return stats 

323 

324 

325def _is_important_stats_line(line: str) -> bool: 

326 """Determine if a stats line is worth logging at the INFO level (or whether 

327 it should be relegated to the DEBUG log) 

328 

329 Parameters 

330 ---------- 

331 line : str 

332 The log line to evaluate 

333 

334 Returns 

335 ------- 

336 bool 

337 True if and only if the line is identified as important 

338 """ 

339 return line.startswith( 

340 ( 

341 "Number of created files:", 

342 "Number of deleted files:", 

343 "Number of regular files transferred:", 

344 "Total transferred file size:", 

345 ) 

346 ) 

347 

348 

349def pull( 

350 remote_uri: ParseResult, 

351 local_path: Path, 

352 exclude: Iterable[str], 

353 dry_run: bool, 

354 use_daemon: bool = False, 

355 timeout: int | None = None, 

356 delete: bool = True, 

357 verbosity: int = 0, 

358 rsync_args: Iterable[str] | None = None, 

359) -> None: 

360 """Sync an upstream file or folder into the specified location using rsync. 

361 This will overwrite any files and folders already at the destination. 

362 

363 Parameters 

364 ---------- 

365 remote_uri : ParseResult 

366 The URI for the remote resource to copy from 

367 local_path : Path 

368 The destination folder 

369 exclude : list of str 

370 Any patterns that should be excluded from the sync 

371 dry_run : bool 

372 Whether to only simulate this sync (report the operations to be performed 

373 but not actually perform them) 

374 use_daemon : bool, optional 

375 By default, the rsync is performed over ssh. If you happen to have an 

376 rsync daemon running on your system, however, you're welcome to leverage 

377 it instead by passing in `use_daemon=True` 

378 timeout : int, optional 

379 The number of seconds to wait before timing out the sync operation. 

380 If None is provided, no explicit timeout value will be set. 

381 delete : bool, optional 

382 Whether part of the syncing should include deleting files at the destination 

383 that aren't at the source. Default is True. 

384 verbosity : int 

385 A modifier for how much info to output either to stdout or the INFO-level 

386 logs. Defaults to 0. 

387 rsync_args: list of str, optional 

388 Any additional arguments to pass into rsync. Note that rsync is run by 

389 default with the flags: `-shaz` 

390 

391 Raises 

392 ------ 

393 FileNotFoundError 

394 If the destination folder does not exist 

395 

396 Notes 

397 ----- 

398 - This method does not provide for interactive authentication. If using 

399 rsync over SSH, you'll need to be set up for password-less (key-based) 

400 access. 

401 - If the destination folder does not already exist, this method will not 

402 create it or its parent directories. 

403 """ 

404 if not local_path.exists(): 

405 raise FileNotFoundError(f"{local_path} does not exist") 

406 

407 if remote_uri.netloc == get_default_netloc(): 

408 SYNC_LOGGER.debug("Performing sync as a local transfer") 

409 remote_path: str = unquote(remote_uri.path) 

410 elif use_daemon: 

411 remote_path = remote_uri.geturl() 

412 else: 

413 remote_path = uri_to_ssh(remote_uri) 

414 

415 if rsync_args: # pragma: no cover 

416 raise NotImplementedError 

417 

418 run_rsync( 

419 local_path.parent, 

420 remote_path, 

421 local_path.name, 

422 delete, 

423 dry_run, 

424 exclude, 

425 *(rsync_args or ()), 

426 timeout=timeout, 

427 verbosity=verbosity, 

428 ) 

429 

430 

431def push( 

432 local_path: Path, 

433 remote_uri: ParseResult, 

434 exclude: Iterable[str], 

435 dry_run: bool, 

436 use_daemon: bool = False, 

437 timeout: int | None = None, 

438 delete: bool = True, 

439 verbosity: int = 0, 

440 rsync_args: Iterable[str] | None = None, 

441) -> None: 

442 """Sync a local file or folder into the specified location using rsync. 

443 This will overwrite any files and folders already at the destination. 

444 

445 Parameters 

446 ---------- 

447 local_path : Path 

448 The file or folder to copy 

449 remote_uri : ParseResult 

450 The URI for the remote location to copy into 

451 exclude : list of str 

452 Any patterns that should be excluded from the sync 

453 dry_run : bool 

454 Whether to only simulate this sync (report the operations to be performed 

455 but not actually perform them) 

456 use_daemon : bool, optional 

457 By default, the rsync is performed over ssh. If you happen to have an 

458 rsync daemon running on your system, however, you're welcome to leverage 

459 it instead by passing in `use_daemon=True` 

460 timeout : int, optional 

461 The number of seconds to wait before timing out the sync operation. 

462 If None is provided, no explicit timeout value will be set. 

463 delete : bool, optional 

464 Whether part of the syncing should include deleting files at the destination 

465 that aren't at the source. Default is True. 

466 verbosity : int 

467 A modifier for how much info to output either to stdout or the INFO-level 

468 logs. Defaults to 0. 

469 rsync_args: list of str, optional 

470 Any additional arguments to pass into rsync. Note that rsync is run by 

471 default with the flags: `-shaz` 

472 

473 Notes 

474 ----- 

475 - This method does not provide for interactive authentication. If using 

476 rsync over SSH, you'll need to be set up for password-less (key-based) 

477 access. 

478 - If the destination folder does not already exist, this method will very 

479 likely fail. 

480 """ 

481 if remote_uri.netloc == get_default_netloc(): 

482 SYNC_LOGGER.debug("Performing sync as a local transfer") 

483 remote_path: str = unquote(remote_uri.path) 

484 elif use_daemon: 

485 remote_path = remote_uri.geturl() 

486 else: 

487 remote_path = uri_to_ssh(remote_uri) 

488 

489 if rsync_args: # pragma: no cover 

490 raise NotImplementedError 

491 

492 run_rsync( 

493 local_path.parent, 

494 local_path.name, 

495 remote_path, 

496 delete, 

497 dry_run, 

498 exclude, 

499 *(rsync_args or ()), 

500 timeout=timeout, 

501 verbosity=verbosity, 

502 )