Coverage for enderchest/sync/sftp.py: 90%

181 statements  

« prev     ^ index     » next       coverage.py v7.7.1, created at 2025-03-28 20:32 +0000

1"""paramiko-based sftp sync implementation""" 

2 

3import os 

4import posixpath 

5import stat 

6from collections.abc import Collection, Generator 

7from contextlib import contextmanager 

8from pathlib import Path 

9from typing import Any 

10from urllib.parse import ParseResult, unquote 

11from urllib.request import url2pathname 

12 

13import paramiko 

14 

15from ..prompt import prompt 

16from . import ( 

17 SYNC_LOGGER, 

18 Op, 

19 diff, 

20 file, 

21 filter_contents, 

22 generate_sync_report, 

23 is_identical, 

24) 

25 

26 

27@contextmanager 

28def connect( 

29 uri: ParseResult, timeout: float | None = None 

30) -> Generator[paramiko.sftp_client.SFTPClient, None, None]: 

31 """Yield an SFTPClient connected to the server specified by the given URI 

32 

33 Parameters 

34 ---------- 

35 uri : ParseResult 

36 The URI of the EnderChest to connect to 

37 timeout : float, optional 

38 The number of seconds to wait before timing out the sync operation. 

39 If None is provided, no explicit timeout value will be set. 

40 

41 Yields 

42 ------ 

43 SFTPClient 

44 A Paramiko SFTP client connected to the specified server 

45 

46 Raises 

47 ------ 

48 ValueError 

49 If the URI is invalid or the credentials are incorrect 

50 RuntimeError 

51 If the server cannot be reached 

52 """ 

53 ssh_client = paramiko.client.SSHClient() 

54 ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) 

55 

56 extra_kwargs: dict[str, Any] = {} 

57 if timeout is not None: 

58 extra_kwargs["timeout"] = timeout 

59 

60 try: 

61 ssh_client.connect( 

62 uri.hostname or "localhost", 

63 port=uri.port or 22, 

64 username=uri.username, 

65 # note: passing in password is explicitly unsupported 

66 **extra_kwargs, 

67 ) 

68 except paramiko.AuthenticationException: 

69 target = ((uri.username + "@") if uri.username else "") + ( 

70 uri.hostname or "localhost" 

71 ) 

72 

73 SYNC_LOGGER.warning( 

74 f"This machine is not set up for passwordless login to {target}" 

75 "\nFor instructions on setting up public key-based authentication," 

76 " which is both" 

77 "\nmore convenient and more secure, see:" 

78 "\nhttps://openbagtwo.github.io/EnderChest" 

79 "/dev/suggestions/#passwordless-ssh-authentication" 

80 ) 

81 password = prompt(f"Please enter the password for {target}", is_password=True) 

82 try: 

83 ssh_client.connect( 

84 uri.hostname or "localhost", 

85 port=uri.port or 22, 

86 username=uri.username, 

87 password=password, 

88 **extra_kwargs, 

89 ) 

90 except paramiko.AuthenticationException as bad_login: 

91 raise ValueError( 

92 "Authentication failed." 

93 " Did you supply the correct username and password?" 

94 ) from bad_login 

95 

96 try: 

97 sftp_client = ssh_client.open_sftp() 

98 yield sftp_client 

99 sftp_client.close() 

100 finally: 

101 ssh_client.close() 

102 

103 

104def download_file( 

105 client: paramiko.sftp_client.SFTPClient, 

106 remote_loc: str, 

107 local_path: Path, 

108 remote_stat: paramiko.SFTPAttributes, 

109) -> None: 

110 """Download a file from a remote SFTP server and save it at the specified 

111 location. 

112 

113 Parameters 

114 ---------- 

115 client : Paramiko SFTP client 

116 An authenticated client connected to the remote server 

117 remote_loc : str 

118 The POSIX path of the file to download 

119 local_path : Path 

120 The path to locally save the file 

121 remote_stat : stat-like 

122 The `os.stat_result`-like properties of the remote object 

123 

124 Notes 

125 ----- 

126 This is a wrapper around `client.get()` that can handle symlinks and 

127 updating timestamps. It does not check if either path is valid, points 

128 to a file, lives in an existing folder, etc. 

129 """ 

130 if stat.S_ISLNK(remote_stat.st_mode or 0): 

131 local_path.symlink_to(Path((client.readlink(remote_loc) or ""))) 

132 else: 

133 client.get(remote_loc, local_path) 

134 if remote_stat.st_atime and remote_stat.st_mtime: 

135 os.utime( 

136 local_path, 

137 times=(remote_stat.st_atime, remote_stat.st_mtime), 

138 ) 

139 

140 

141def upload_file( 

142 client: paramiko.sftp_client.SFTPClient, 

143 local_path: Path, 

144 remote_loc: str, 

145) -> None: 

146 """Upload a local file to a remote SFTP server 

147 

148 Parameters 

149 ---------- 

150 client : Paramiko SFTP client 

151 An authenticated client connected to the remote server 

152 local_path : Path 

153 The path of the file to upload 

154 remote_loc : str 

155 The POSIX path for the remote location to save the file 

156 

157 Notes 

158 ----- 

159 This is just a wrapper around `client.put()` that can handle symlinks. 

160 It does not check if either path is valid, points to a file, lives in an 

161 existing folder, etc. 

162 """ 

163 if local_path.is_symlink(): 

164 client.symlink(local_path.readlink().as_posix(), remote_loc) 

165 else: 

166 client.put(local_path, remote_loc) 

167 client.utime( 

168 remote_loc, times=(local_path.stat().st_atime, local_path.stat().st_mtime) 

169 ) 

170 

171 

172def rglob( 

173 client: paramiko.sftp_client.SFTPClient, path: str 

174) -> list[tuple[Path, paramiko.sftp_attr.SFTPAttributes]]: 

175 """Recursively enumerate the contents of a remote directory 

176 

177 Parameters 

178 ---------- 

179 client : Paramiko SFTP client 

180 An authenticated client connected to the remote server 

181 path : str 

182 The absolute path to scan 

183 

184 Returns 

185 ------- 

186 list of (Path, SFTPAttributes) tuples 

187 The attributes of all files, folders and symlinks found under the 

188 specified path 

189 

190 Notes 

191 ----- 

192 - The paths returned are *absolute* 

193 - The search is performed depth-first 

194 """ 

195 SYNC_LOGGER.debug(f"ls {path}") 

196 top_level = client.listdir_attr(path) 

197 contents: list[tuple[Path, paramiko.sftp_attr.SFTPAttributes]] = [] 

198 for remote_object in top_level: 

199 remote_object.filename = posixpath.join(path, remote_object.filename) 

200 contents.append((Path(url2pathname(remote_object.filename)), remote_object)) 

201 if stat.S_ISDIR(remote_object.st_mode or 0): 

202 contents.extend(rglob(client, remote_object.filename)) 

203 return contents 

204 

205 

206def get_contents( 

207 client: paramiko.sftp_client.SFTPClient, path: str 

208) -> list[tuple[Path, paramiko.sftp_attr.SFTPAttributes]]: 

209 """Recursively fetch the contents of a remote directory 

210 

211 Parameters 

212 ---------- 

213 client : Paramiko SFTP client 

214 An authenticated client connected to the remote server 

215 path : str 

216 The absolute path to scan 

217 

218 Returns 

219 ------- 

220 list of (Path, SFTPAttributes) tuples 

221 The attributes of all files, folders and symlinks found under the 

222 specified path 

223 

224 Notes 

225 ----- 

226 - This list is generated via a depth-first search so that all parent 

227 directories appear before their children 

228 - The paths returned are relative to the provided path 

229 """ 

230 return [ 

231 (p.relative_to(url2pathname(path)), path_stat) 

232 for p, path_stat in rglob(client, path) 

233 ] 

234 

235 

236def pull( 

237 remote_uri: ParseResult, 

238 local_path: Path, 

239 exclude: Collection[str], 

240 dry_run: bool, 

241 timeout: float | None = None, 

242 delete: bool = True, 

243 **unsupported_kwargs, 

244) -> None: 

245 """Sync an upstream file or folder into the specified location SFTP. 

246 This will overwrite any files and folders already at the destination. 

247 

248 Parameters 

249 ---------- 

250 remote_uri : ParseResult 

251 The URI for the remote resource to copy from 

252 local_path : Path 

253 The destination folder 

254 exclude : list of str 

255 Any patterns that should be excluded from the sync 

256 dry_run : bool 

257 Whether to only simulate this sync (report the operations to be performed 

258 but not actually perform them) 

259 timeout : float, optional 

260 The number of seconds to wait before timing out the sync operation. 

261 If None is provided, no explicit timeout value will be set. 

262 delete : bool 

263 Whether part of the syncing should include deleting files at the destination 

264 that aren't at the source. Default is True. 

265 **unsupported_kwargs 

266 Any other provided options will be ignored 

267 

268 Raises 

269 ------ 

270 FileNotFoundError 

271 If the destination folder does not exist, or if the remote path 

272 does not exist 

273 OSError 

274 If the remote path cannot be accessed for any other reason (permissions, 

275 most likely) 

276 

277 Notes 

278 ----- 

279 - If the destination folder does not already exist, this method will not 

280 create it or its parent directories. 

281 """ 

282 if not local_path.exists(): 

283 raise FileNotFoundError(f"{local_path} does not exist") 

284 if unsupported_kwargs: 

285 SYNC_LOGGER.debug( 

286 "The following command-line options are ignored for this protocol:\n%s", 

287 "\n".join(" {}: {}".format(*item) for item in unsupported_kwargs.items()), 

288 ) 

289 

290 remote_loc = posixpath.normpath(unquote(remote_uri.path)) 

291 destination_path = local_path / posixpath.basename(remote_loc) 

292 

293 if destination_path.is_symlink() and not destination_path.is_dir(): 

294 SYNC_LOGGER.warning("Removing symlink %s", destination_path) 

295 if not dry_run: 

296 destination_path.unlink() 

297 else: 

298 SYNC_LOGGER.debug( 

299 "And replacing it entirely with the remote's %s", remote_loc 

300 ) 

301 return 

302 elif destination_path.exists() and not destination_path.is_dir(): 

303 SYNC_LOGGER.warning("Deleting file %s", destination_path) 

304 if not dry_run: 

305 destination_path.unlink() 

306 else: 

307 SYNC_LOGGER.debug( 

308 "And replacing it entirely with the remote's %s", remote_loc 

309 ) 

310 return 

311 

312 with connect(uri=remote_uri, timeout=timeout) as remote: 

313 try: 

314 source_target = remote.lstat(remote_loc) 

315 except OSError as bad_target: 

316 raise type(bad_target)( 

317 f"Could not access {remote_loc} on remote: {bad_target}" 

318 ) 

319 if not stat.S_ISDIR(source_target.st_mode or 0): 

320 if destination_path.exists() and is_identical( 

321 source_target, destination_path.stat() 

322 ): 

323 SYNC_LOGGER.warning( 

324 "Remote file matches %s. No transfer needed.", 

325 destination_path, 

326 ) 

327 return 

328 SYNC_LOGGER.debug( 

329 "Downloading file %s from remote", 

330 destination_path, 

331 ) 

332 if not dry_run: 

333 download_file( 

334 remote, 

335 remote_loc, 

336 destination_path, 

337 source_target, 

338 ) 

339 return 

340 

341 if not destination_path.exists(): 

342 SYNC_LOGGER.debug( 

343 "Downloading the entire contents of the remote's %s", remote_loc 

344 ) 

345 if dry_run: 

346 return 

347 destination_path.mkdir() 

348 

349 source_contents = filter_contents( 

350 get_contents(remote, remote_loc), 

351 exclude, 

352 prefix=remote_loc, 

353 ) 

354 destination_contents = filter_contents( 

355 file.get_contents(destination_path), 

356 exclude, 

357 prefix=destination_path, 

358 ) 

359 

360 sync_diff = diff(source_contents, destination_contents) 

361 

362 if dry_run: 

363 generate_sync_report(sync_diff) 

364 return 

365 

366 ignore = file.ignore_patterns(*exclude) 

367 for path, path_stat, operation in sync_diff: 

368 match (operation, stat.S_ISDIR(path_stat.st_mode or 0)): 

369 case (Op.CREATE, True): 

370 SYNC_LOGGER.debug("Creating directory %s", destination_path / path) 

371 (destination_path / path).mkdir(parents=True, exist_ok=True) 

372 case (Op.CREATE, False) | (Op.REPLACE, False): 

373 SYNC_LOGGER.debug( 

374 "Downloading file %s from remote", 

375 destination_path / path, 

376 ) 

377 (destination_path / path).unlink(missing_ok=True) 

378 download_file( 

379 remote, 

380 posixpath.join(remote_loc, path.as_posix()), 

381 destination_path / path, 

382 path_stat, # type: ignore[arg-type] 

383 ) 

384 case (Op.DELETE, True): 

385 # recall that for deletions, it's the *destination's* stats 

386 if delete: 

387 file.clean(destination_path / path, ignore, dry_run) 

388 case (Op.DELETE, False): 

389 SYNC_LOGGER.debug("Deleting file %s", destination_path / path) 

390 if delete: 

391 (destination_path / path).unlink() 

392 case op, is_dir: # pragma: no cover 

393 raise NotImplementedError( 

394 f"Don't know how to handle {op} of {'directory' if is_dir else 'file'}" 

395 ) 

396 

397 

398def push( 

399 local_path: Path, 

400 remote_uri: ParseResult, 

401 exclude: Collection[str], 

402 dry_run: bool, 

403 timeout: float | None = None, 

404 delete: bool = True, 

405 **unsupported_kwargs, 

406) -> None: 

407 """Sync a local file or folder into the specified location using SFTP. 

408 This will overwrite any files and folders already at the destination. 

409 

410 Parameters 

411 ---------- 

412 local_path : Path 

413 The file or folder to copy 

414 remote_uri : ParseResult 

415 The URI for the remote location to copy into 

416 exclude : list of str 

417 Any patterns that should be excluded from the sync 

418 dry_run : bool 

419 Whether to only simulate this sync (report the operations to be performed 

420 but not actually perform them) 

421 timeout : float, optional 

422 The number of seconds to wait before timing out the sync operation. 

423 If None is provided, no explicit timeout value will be set. 

424 delete : bool, optional 

425 Whether part of the syncing should include deleting files at the destination 

426 that aren't at the source. Default is True. 

427 **unsupported_kwargs 

428 Any other provided options will be ignored 

429 

430 Notes 

431 ----- 

432 - If the destination folder does not already exist, this method will not 

433 create it or its parent directories. 

434 """ 

435 if not local_path.exists(): 

436 raise FileNotFoundError(f"{local_path} does not exist.") 

437 if unsupported_kwargs: 

438 SYNC_LOGGER.debug( 

439 "The following command-line options are ignored for this protocol:\n%s", 

440 "\n".join(" {}: {}".format(*item) for item in unsupported_kwargs.items()), 

441 ) 

442 

443 remote_parent = posixpath.normpath(unquote(remote_uri.path)) 

444 

445 with connect(uri=remote_uri, timeout=timeout) as remote: 

446 try: 

447 remote_folder_stat = remote.lstat(remote_parent) 

448 except OSError as bad_target: 

449 raise type(bad_target)( 

450 f"Could not access {remote_parent} on remote: {bad_target}" 

451 ) 

452 if not stat.S_ISDIR(remote_folder_stat.st_mode or 0): 

453 raise NotADirectoryError(f"{remote_parent} on remote is not a directory.") 

454 

455 remote_loc = posixpath.join(remote_parent, local_path.name) 

456 try: 

457 target_stat = remote.lstat(remote_loc) 

458 except FileNotFoundError: 

459 target_stat = None 

460 if not stat.S_ISDIR(local_path.stat().st_mode or 0): 

461 if target_stat and is_identical(local_path.stat(), target_stat): 

462 SYNC_LOGGER.warning("Remote file matches %s", local_path) 

463 return 

464 

465 SYNC_LOGGER.debug( 

466 "Uploading file %s to remote", 

467 local_path, 

468 ) 

469 if not dry_run: 

470 upload_file(remote, local_path, remote_loc) 

471 return 

472 if not target_stat: 

473 SYNC_LOGGER.debug("Uploading the entire contents %s", local_path) 

474 if dry_run: 

475 return 

476 remote.mkdir(remote_loc) 

477 elif not stat.S_ISDIR(target_stat.st_mode or 0): 

478 SYNC_LOGGER.warning( 

479 "Deleting remote file or symlink %s", 

480 remote_loc, 

481 ) 

482 if dry_run: 

483 SYNC_LOGGER.debug("And replacing it entirely with %s", local_path) 

484 return 

485 remote.remove(remote_loc) 

486 remote.mkdir(remote_loc) 

487 

488 source_contents = filter_contents( 

489 file.get_contents(local_path), exclude, prefix=local_path 

490 ) 

491 destination_contents = filter_contents( 

492 get_contents(remote, remote_loc), 

493 exclude, 

494 prefix=remote_loc, 

495 ) 

496 

497 sync_diff = diff(source_contents, destination_contents) 

498 

499 if dry_run: 

500 generate_sync_report(sync_diff) 

501 return 

502 

503 for path, path_stat, operation in sync_diff: 

504 posix_path = posixpath.join(remote_loc, path.as_posix()) 

505 match (operation, stat.S_ISDIR(path_stat.st_mode or 0)): 

506 case (Op.CREATE, True): 

507 SYNC_LOGGER.debug("Creating remote directory %s", posix_path) 

508 remote.mkdir(posix_path) 

509 case (Op.CREATE, False) | (Op.REPLACE, False): 

510 SYNC_LOGGER.debug( 

511 "Uploading file %s to remote", 

512 local_path / path, 

513 ) 

514 try: 

515 remote.remove(posix_path) 

516 except FileNotFoundError: 

517 pass 

518 upload_file( 

519 remote, 

520 local_path / path, 

521 posix_path, 

522 ) 

523 case (Op.DELETE, True): 

524 # recall that for deletions, it's the *destination's* stats 

525 if delete: 

526 remote.rmdir(posix_path) 

527 case (Op.DELETE, False): 

528 if delete: 

529 SYNC_LOGGER.debug("Deleting remote file %s", posix_path) 

530 remote.remove(posix_path) 

531 case op, is_dir: # pragma: no cover 

532 raise NotImplementedError( 

533 f"Don't know how to handle {op} of {'directory' if is_dir else 'file'}" 

534 )