Coverage for enderchest/sync/sftp.py: 90%

181 statements  

« prev     ^ index     » next       coverage.py v7.10.1, created at 2025-07-30 12:06 +0000

1"""paramiko-based sftp sync implementation""" 

2 

3import os 

4import posixpath 

5import stat 

6from collections.abc import Collection, Generator 

7from contextlib import contextmanager 

8from pathlib import Path 

9from typing import Any 

10from urllib.parse import ParseResult, unquote 

11from urllib.request import url2pathname 

12 

13import paramiko 

14 

15from ..prompt import prompt 

16from . import ( 

17 SYNC_LOGGER, 

18 Op, 

19 diff, 

20 file, 

21 filter_contents, 

22 generate_sync_report, 

23 is_identical, 

24) 

25 

26 

27@contextmanager 

28def connect( 

29 uri: ParseResult, timeout: float | None = None 

30) -> Generator[paramiko.sftp_client.SFTPClient, None, None]: 

31 """Yield an SFTPClient connected to the server specified by the given URI 

32 

33 Parameters 

34 ---------- 

35 uri : ParseResult 

36 The URI of the EnderChest to connect to 

37 timeout : float, optional 

38 The number of seconds to wait before timing out the sync operation. 

39 If None is provided, no explicit timeout value will be set. 

40 

41 Yields 

42 ------ 

43 SFTPClient 

44 A Paramiko SFTP client connected to the specified server 

45 

46 Raises 

47 ------ 

48 ValueError 

49 If the URI is invalid or the credentials are incorrect 

50 RuntimeError 

51 If the server cannot be reached 

52 """ 

53 ssh_client = paramiko.client.SSHClient() 

54 ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) 

55 

56 extra_kwargs: dict[str, Any] = {} 

57 if timeout is not None: 

58 extra_kwargs["timeout"] = timeout 

59 

60 try: 

61 ssh_client.connect( 

62 uri.hostname or "localhost", 

63 port=uri.port or 22, 

64 username=uri.username, 

65 # note: passing in password is explicitly unsupported 

66 **extra_kwargs, 

67 ) 

68 except paramiko.AuthenticationException: 

69 target = ((uri.username + "@") if uri.username else "") + ( 

70 uri.hostname or "localhost" 

71 ) 

72 

73 SYNC_LOGGER.warning( 

74 "This machine is not set up for passwordless login to %s" 

75 "\nFor instructions on setting up public key-based authentication," 

76 " which is both" 

77 "\nmore convenient and more secure, see:" 

78 "\nhttps://openbagtwo.github.io/EnderChest" 

79 "/dev/suggestions/#passwordless-ssh-authentication", 

80 target, 

81 ) 

82 password = prompt(f"Please enter the password for {target}", is_password=True) 

83 try: 

84 ssh_client.connect( 

85 uri.hostname or "localhost", 

86 port=uri.port or 22, 

87 username=uri.username, 

88 password=password, 

89 **extra_kwargs, 

90 ) 

91 except paramiko.AuthenticationException as bad_login: 

92 raise ValueError( 

93 "Authentication failed." 

94 " Did you supply the correct username and password?" 

95 ) from bad_login 

96 

97 try: 

98 sftp_client = ssh_client.open_sftp() 

99 yield sftp_client 

100 sftp_client.close() 

101 finally: 

102 ssh_client.close() 

103 

104 

105def download_file( 

106 client: paramiko.sftp_client.SFTPClient, 

107 remote_loc: str, 

108 local_path: Path, 

109 remote_stat: paramiko.SFTPAttributes, 

110) -> None: 

111 """Download a file from a remote SFTP server and save it at the specified 

112 location. 

113 

114 Parameters 

115 ---------- 

116 client : Paramiko SFTP client 

117 An authenticated client connected to the remote server 

118 remote_loc : str 

119 The POSIX path of the file to download 

120 local_path : Path 

121 The path to locally save the file 

122 remote_stat : stat-like 

123 The `os.stat_result`-like properties of the remote object 

124 

125 Notes 

126 ----- 

127 This is a wrapper around `client.get()` that can handle symlinks and 

128 updating timestamps. It does not check if either path is valid, points 

129 to a file, lives in an existing folder, etc. 

130 """ 

131 if stat.S_ISLNK(remote_stat.st_mode or 0): 

132 local_path.symlink_to(Path((client.readlink(remote_loc) or ""))) 

133 else: 

134 client.get(remote_loc, local_path) 

135 if remote_stat.st_atime and remote_stat.st_mtime: 

136 os.utime( 

137 local_path, 

138 times=(remote_stat.st_atime, remote_stat.st_mtime), 

139 ) 

140 

141 

142def upload_file( 

143 client: paramiko.sftp_client.SFTPClient, 

144 local_path: Path, 

145 remote_loc: str, 

146) -> None: 

147 """Upload a local file to a remote SFTP server 

148 

149 Parameters 

150 ---------- 

151 client : Paramiko SFTP client 

152 An authenticated client connected to the remote server 

153 local_path : Path 

154 The path of the file to upload 

155 remote_loc : str 

156 The POSIX path for the remote location to save the file 

157 

158 Notes 

159 ----- 

160 This is just a wrapper around `client.put()` that can handle symlinks. 

161 It does not check if either path is valid, points to a file, lives in an 

162 existing folder, etc. 

163 """ 

164 if local_path.is_symlink(): 

165 client.symlink(local_path.readlink().as_posix(), remote_loc) 

166 else: 

167 client.put(local_path, remote_loc) 

168 client.utime( 

169 remote_loc, times=(local_path.stat().st_atime, local_path.stat().st_mtime) 

170 ) 

171 

172 

173def rglob( 

174 client: paramiko.sftp_client.SFTPClient, path: str 

175) -> list[tuple[Path, paramiko.sftp_attr.SFTPAttributes]]: 

176 """Recursively enumerate the contents of a remote directory 

177 

178 Parameters 

179 ---------- 

180 client : Paramiko SFTP client 

181 An authenticated client connected to the remote server 

182 path : str 

183 The absolute path to scan 

184 

185 Returns 

186 ------- 

187 list of (Path, SFTPAttributes) tuples 

188 The attributes of all files, folders and symlinks found under the 

189 specified path 

190 

191 Notes 

192 ----- 

193 - The paths returned are *absolute* 

194 - The search is performed depth-first 

195 """ 

196 SYNC_LOGGER.debug("ls %s", path) 

197 top_level = client.listdir_attr(path) 

198 contents: list[tuple[Path, paramiko.sftp_attr.SFTPAttributes]] = [] 

199 for remote_object in top_level: 

200 remote_object.filename = posixpath.join(path, remote_object.filename) 

201 contents.append((Path(url2pathname(remote_object.filename)), remote_object)) 

202 if stat.S_ISDIR(remote_object.st_mode or 0): 

203 contents.extend(rglob(client, remote_object.filename)) 

204 return contents 

205 

206 

207def get_contents( 

208 client: paramiko.sftp_client.SFTPClient, path: str 

209) -> list[tuple[Path, paramiko.sftp_attr.SFTPAttributes]]: 

210 """Recursively fetch the contents of a remote directory 

211 

212 Parameters 

213 ---------- 

214 client : Paramiko SFTP client 

215 An authenticated client connected to the remote server 

216 path : str 

217 The absolute path to scan 

218 

219 Returns 

220 ------- 

221 list of (Path, SFTPAttributes) tuples 

222 The attributes of all files, folders and symlinks found under the 

223 specified path 

224 

225 Notes 

226 ----- 

227 - This list is generated via a depth-first search so that all parent 

228 directories appear before their children 

229 - The paths returned are relative to the provided path 

230 """ 

231 return [ 

232 (p.relative_to(url2pathname(path)), path_stat) 

233 for p, path_stat in rglob(client, path) 

234 ] 

235 

236 

237def pull( 

238 remote_uri: ParseResult, 

239 local_path: Path, 

240 exclude: Collection[str], 

241 dry_run: bool, 

242 timeout: float | None = None, 

243 delete: bool = True, 

244 **unsupported_kwargs, 

245) -> None: 

246 """Sync an upstream file or folder into the specified location SFTP. 

247 This will overwrite any files and folders already at the destination. 

248 

249 Parameters 

250 ---------- 

251 remote_uri : ParseResult 

252 The URI for the remote resource to copy from 

253 local_path : Path 

254 The destination folder 

255 exclude : list of str 

256 Any patterns that should be excluded from the sync 

257 dry_run : bool 

258 Whether to only simulate this sync (report the operations to be performed 

259 but not actually perform them) 

260 timeout : float, optional 

261 The number of seconds to wait before timing out the sync operation. 

262 If None is provided, no explicit timeout value will be set. 

263 delete : bool 

264 Whether part of the syncing should include deleting files at the destination 

265 that aren't at the source. Default is True. 

266 **unsupported_kwargs 

267 Any other provided options will be ignored 

268 

269 Raises 

270 ------ 

271 FileNotFoundError 

272 If the destination folder does not exist, or if the remote path 

273 does not exist 

274 OSError 

275 If the remote path cannot be accessed for any other reason (permissions, 

276 most likely) 

277 

278 Notes 

279 ----- 

280 - If the destination folder does not already exist, this method will not 

281 create it or its parent directories. 

282 """ 

283 if not local_path.exists(): 

284 raise FileNotFoundError(f"{local_path} does not exist") 

285 if unsupported_kwargs: 

286 SYNC_LOGGER.debug( 

287 "The following command-line options are ignored for this protocol:\n%s", 

288 "\n".join(" {}: {}".format(*item) for item in unsupported_kwargs.items()), 

289 ) 

290 

291 remote_loc = posixpath.normpath(unquote(remote_uri.path)) 

292 destination_path = local_path / posixpath.basename(remote_loc) 

293 

294 if destination_path.is_symlink() and not destination_path.is_dir(): 

295 SYNC_LOGGER.warning("Removing symlink %s", destination_path) 

296 if not dry_run: 

297 destination_path.unlink() 

298 else: 

299 SYNC_LOGGER.debug( 

300 "And replacing it entirely with the remote's %s", remote_loc 

301 ) 

302 return 

303 elif destination_path.exists() and not destination_path.is_dir(): 

304 SYNC_LOGGER.warning("Deleting file %s", destination_path) 

305 if not dry_run: 

306 destination_path.unlink() 

307 else: 

308 SYNC_LOGGER.debug( 

309 "And replacing it entirely with the remote's %s", remote_loc 

310 ) 

311 return 

312 

313 with connect(uri=remote_uri, timeout=timeout) as remote: 

314 try: 

315 source_target = remote.lstat(remote_loc) 

316 except OSError as bad_target: 

317 raise type(bad_target)( 

318 f"Could not access {remote_loc} on remote: {bad_target}" 

319 ) 

320 if not stat.S_ISDIR(source_target.st_mode or 0): 

321 if destination_path.exists() and is_identical( 

322 source_target, destination_path.stat() 

323 ): 

324 SYNC_LOGGER.warning( 

325 "Remote file matches %s. No transfer needed.", 

326 destination_path, 

327 ) 

328 return 

329 SYNC_LOGGER.debug( 

330 "Downloading file %s from remote", 

331 destination_path, 

332 ) 

333 if not dry_run: 

334 download_file( 

335 remote, 

336 remote_loc, 

337 destination_path, 

338 source_target, 

339 ) 

340 return 

341 

342 if not destination_path.exists(): 

343 SYNC_LOGGER.debug( 

344 "Downloading the entire contents of the remote's %s", remote_loc 

345 ) 

346 if dry_run: 

347 return 

348 destination_path.mkdir() 

349 

350 source_contents = filter_contents( 

351 get_contents(remote, remote_loc), 

352 exclude, 

353 prefix=remote_loc, 

354 ) 

355 destination_contents = filter_contents( 

356 file.get_contents(destination_path), 

357 exclude, 

358 prefix=destination_path, 

359 ) 

360 

361 sync_diff = diff(source_contents, destination_contents) 

362 

363 if dry_run: 

364 generate_sync_report(sync_diff) 

365 return 

366 

367 ignore = file.ignore_patterns(*exclude) 

368 for path, path_stat, operation in sync_diff: 

369 match (operation, stat.S_ISDIR(path_stat.st_mode or 0)): 

370 case (Op.CREATE, True): 

371 SYNC_LOGGER.debug("Creating directory %s", destination_path / path) 

372 (destination_path / path).mkdir(parents=True, exist_ok=True) 

373 case (Op.CREATE, False) | (Op.REPLACE, False): 

374 SYNC_LOGGER.debug( 

375 "Downloading file %s from remote", 

376 destination_path / path, 

377 ) 

378 (destination_path / path).unlink(missing_ok=True) 

379 download_file( 

380 remote, 

381 posixpath.join(remote_loc, path.as_posix()), 

382 destination_path / path, 

383 path_stat, # type: ignore[arg-type] 

384 ) 

385 case (Op.DELETE, True): 

386 # recall that for deletions, it's the *destination's* stats 

387 if delete: 

388 file.clean(destination_path / path, ignore, dry_run) 

389 case (Op.DELETE, False): 

390 SYNC_LOGGER.debug("Deleting file %s", destination_path / path) 

391 if delete: 

392 (destination_path / path).unlink() 

393 case op, is_dir: # pragma: no cover 

394 raise NotImplementedError( 

395 f"Don't know how to handle {op} of {'directory' if is_dir else 'file'}" 

396 ) 

397 

398 

399def push( 

400 local_path: Path, 

401 remote_uri: ParseResult, 

402 exclude: Collection[str], 

403 dry_run: bool, 

404 timeout: float | None = None, 

405 delete: bool = True, 

406 **unsupported_kwargs, 

407) -> None: 

408 """Sync a local file or folder into the specified location using SFTP. 

409 This will overwrite any files and folders already at the destination. 

410 

411 Parameters 

412 ---------- 

413 local_path : Path 

414 The file or folder to copy 

415 remote_uri : ParseResult 

416 The URI for the remote location to copy into 

417 exclude : list of str 

418 Any patterns that should be excluded from the sync 

419 dry_run : bool 

420 Whether to only simulate this sync (report the operations to be performed 

421 but not actually perform them) 

422 timeout : float, optional 

423 The number of seconds to wait before timing out the sync operation. 

424 If None is provided, no explicit timeout value will be set. 

425 delete : bool, optional 

426 Whether part of the syncing should include deleting files at the destination 

427 that aren't at the source. Default is True. 

428 **unsupported_kwargs 

429 Any other provided options will be ignored 

430 

431 Notes 

432 ----- 

433 - If the destination folder does not already exist, this method will not 

434 create it or its parent directories. 

435 """ 

436 if not local_path.exists(): 

437 raise FileNotFoundError(f"{local_path} does not exist.") 

438 if unsupported_kwargs: 

439 SYNC_LOGGER.debug( 

440 "The following command-line options are ignored for this protocol:\n%s", 

441 "\n".join(" {}: {}".format(*item) for item in unsupported_kwargs.items()), 

442 ) 

443 

444 remote_parent = posixpath.normpath(unquote(remote_uri.path)) 

445 

446 with connect(uri=remote_uri, timeout=timeout) as remote: 

447 try: 

448 remote_folder_stat = remote.lstat(remote_parent) 

449 except OSError as bad_target: 

450 raise type(bad_target)( 

451 f"Could not access {remote_parent} on remote: {bad_target}" 

452 ) 

453 if not stat.S_ISDIR(remote_folder_stat.st_mode or 0): 

454 raise NotADirectoryError(f"{remote_parent} on remote is not a directory.") 

455 

456 remote_loc = posixpath.join(remote_parent, local_path.name) 

457 try: 

458 target_stat = remote.lstat(remote_loc) 

459 except FileNotFoundError: 

460 target_stat = None 

461 if not stat.S_ISDIR(local_path.stat().st_mode or 0): 

462 if target_stat and is_identical(local_path.stat(), target_stat): 

463 SYNC_LOGGER.warning("Remote file matches %s", local_path) 

464 return 

465 

466 SYNC_LOGGER.debug( 

467 "Uploading file %s to remote", 

468 local_path, 

469 ) 

470 if not dry_run: 

471 upload_file(remote, local_path, remote_loc) 

472 return 

473 if not target_stat: 

474 SYNC_LOGGER.debug("Uploading the entire contents %s", local_path) 

475 if dry_run: 

476 return 

477 remote.mkdir(remote_loc) 

478 elif not stat.S_ISDIR(target_stat.st_mode or 0): 

479 SYNC_LOGGER.warning( 

480 "Deleting remote file or symlink %s", 

481 remote_loc, 

482 ) 

483 if dry_run: 

484 SYNC_LOGGER.debug("And replacing it entirely with %s", local_path) 

485 return 

486 remote.remove(remote_loc) 

487 remote.mkdir(remote_loc) 

488 

489 source_contents = filter_contents( 

490 file.get_contents(local_path), exclude, prefix=local_path 

491 ) 

492 destination_contents = filter_contents( 

493 get_contents(remote, remote_loc), 

494 exclude, 

495 prefix=remote_loc, 

496 ) 

497 

498 sync_diff = diff(source_contents, destination_contents) 

499 

500 if dry_run: 

501 generate_sync_report(sync_diff) 

502 return 

503 

504 for path, path_stat, operation in sync_diff: 

505 posix_path = posixpath.join(remote_loc, path.as_posix()) 

506 match (operation, stat.S_ISDIR(path_stat.st_mode or 0)): 

507 case (Op.CREATE, True): 

508 SYNC_LOGGER.debug("Creating remote directory %s", posix_path) 

509 remote.mkdir(posix_path) 

510 case (Op.CREATE, False) | (Op.REPLACE, False): 

511 SYNC_LOGGER.debug( 

512 "Uploading file %s to remote", 

513 local_path / path, 

514 ) 

515 try: 

516 remote.remove(posix_path) 

517 except FileNotFoundError: 

518 pass 

519 upload_file( 

520 remote, 

521 local_path / path, 

522 posix_path, 

523 ) 

524 case (Op.DELETE, True): 

525 # recall that for deletions, it's the *destination's* stats 

526 if delete: 

527 remote.rmdir(posix_path) 

528 case (Op.DELETE, False): 

529 if delete: 

530 SYNC_LOGGER.debug("Deleting remote file %s", posix_path) 

531 remote.remove(posix_path) 

532 case op, is_dir: # pragma: no cover 

533 raise NotImplementedError( 

534 f"Don't know how to handle {op} of {'directory' if is_dir else 'file'}" 

535 )