Coverage for enderchest/sync/sftp.py: 89%

180 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-05-04 01:41 +0000

1"""paramiko-based sftp sync implementation""" 

2 

3import os 

4import posixpath 

5import stat 

6from contextlib import contextmanager 

7from pathlib import Path 

8from typing import Any, Collection, Generator 

9from urllib.parse import ParseResult, unquote 

10from urllib.request import url2pathname 

11 

12import paramiko 

13 

14from ..prompt import prompt 

15from . import ( 

16 SYNC_LOGGER, 

17 Op, 

18 diff, 

19 file, 

20 filter_contents, 

21 generate_sync_report, 

22 is_identical, 

23) 

24 

25 

26@contextmanager 

27def connect( 

28 uri: ParseResult, timeout: float | None = None 

29) -> Generator[paramiko.sftp_client.SFTPClient, None, None]: 

30 """Yield an SFTPClient connected to the server specified by the given URI 

31 

32 Parameters 

33 ---------- 

34 uri : ParseResult 

35 The URI of the EnderChest to connect to 

36 timeout : float, optional 

37 The number of seconds to wait before timing out the sync operation. 

38 If None is provided, no explicit timeout value will be set. 

39 

40 Yields 

41 ------ 

42 SFTPClient 

43 A Paramiko SFTP client connected to the specified server 

44 

45 Raises 

46 ------ 

47 ValueError 

48 If the URI is invalid or the credentials are incorrect 

49 RuntimeError 

50 If the server cannot be reached 

51 """ 

52 ssh_client = paramiko.client.SSHClient() 

53 ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) 

54 

55 extra_kwargs: dict[str, Any] = {} 

56 if timeout is not None: 

57 extra_kwargs["timeout"] = timeout 

58 

59 try: 

60 ssh_client.connect( 

61 uri.hostname or "localhost", 

62 port=uri.port or 22, 

63 username=uri.username, 

64 # note: passing in password is explicitly unsupported 

65 **extra_kwargs, 

66 ) 

67 except paramiko.AuthenticationException: 

68 target = ((uri.username + "@") if uri.username else "") + ( 

69 uri.hostname or "localhost" 

70 ) 

71 

72 SYNC_LOGGER.warning( 

73 f"This machine is not set up for passwordless login to {target}" 

74 "\nFor instructions on setting up public key-based authentication," 

75 " which is both" 

76 "\nmore convenient and more secure, see:" 

77 "\nhttps://openbagtwo.github.io/EnderChest" 

78 "/dev/suggestions/#passwordless-ssh-authentication" 

79 ) 

80 password = prompt(f"Please enter the password for {target}", is_password=True) 

81 try: 

82 ssh_client.connect( 

83 uri.hostname or "localhost", 

84 port=uri.port or 22, 

85 username=uri.username, 

86 password=password, 

87 **extra_kwargs, 

88 ) 

89 except paramiko.AuthenticationException as bad_login: 

90 raise ValueError( 

91 "Authentication failed." 

92 " Did you supply the correct username and password?" 

93 ) from bad_login 

94 

95 try: 

96 sftp_client = ssh_client.open_sftp() 

97 yield sftp_client 

98 sftp_client.close() 

99 finally: 

100 ssh_client.close() 

101 

102 

103def download_file( 

104 client: paramiko.sftp_client.SFTPClient, 

105 remote_loc: str, 

106 local_path: Path, 

107 remote_stat: paramiko.SFTPAttributes, 

108) -> None: 

109 """Download a file from a remote SFTP server and save it at the specified 

110 location. 

111 

112 Parameters 

113 ---------- 

114 client : Paramiko SFTP client 

115 An authenticated client connected to the remote server 

116 remote_loc : str 

117 The POSIX path of the file to download 

118 local_path : Path 

119 The path to locally save the file 

120 remote_stat : stat-like 

121 The `os.stat_result`-like properties of the remote object 

122 

123 Notes 

124 ----- 

125 This is a wrapper around `client.get()` that can handle symlinks and 

126 updating timestamps. It does not check if either path is valid, points 

127 to a file, lives in an existing folder, etc. 

128 """ 

129 if stat.S_ISLNK(remote_stat.st_mode or 0): 

130 local_path.symlink_to(Path((client.readlink(remote_loc) or ""))) 

131 else: 

132 client.get(remote_loc, local_path) 

133 if remote_stat.st_atime and remote_stat.st_mtime: 

134 os.utime( 

135 local_path, 

136 times=(remote_stat.st_atime, remote_stat.st_mtime), 

137 ) 

138 

139 

140def upload_file( 

141 client: paramiko.sftp_client.SFTPClient, 

142 local_path: Path, 

143 remote_loc: str, 

144) -> None: 

145 """Upload a local file to a remote SFTP server 

146 

147 Parameters 

148 ---------- 

149 client : Paramiko SFTP client 

150 An authenticated client connected to the remote server 

151 local_path : Path 

152 The path of the file to upload 

153 remote_loc : str 

154 The POSIX path for the remote location to save the file 

155 

156 Notes 

157 ----- 

158 This is just a wrapper around `client.put()` that can handle symlinks. 

159 It does not check if either path is valid, points to a file, lives in an 

160 existing folder, etc. 

161 """ 

162 if local_path.is_symlink(): 

163 client.symlink(local_path.readlink().as_posix(), remote_loc) 

164 else: 

165 client.put(local_path, remote_loc) 

166 client.utime( 

167 remote_loc, times=(local_path.stat().st_atime, local_path.stat().st_mtime) 

168 ) 

169 

170 

171def rglob( 

172 client: paramiko.sftp_client.SFTPClient, path: str 

173) -> list[tuple[Path, paramiko.sftp_attr.SFTPAttributes]]: 

174 """Recursively enumerate the contents of a remote directory 

175 

176 Parameters 

177 ---------- 

178 client : Paramiko SFTP client 

179 An authenticated client connected to the remote server 

180 path : str 

181 The absolute path to scan 

182 

183 Returns 

184 ------- 

185 list of (Path, SFTPAttributes) tuples 

186 The attributes of all files, folders and symlinks found under the 

187 specified path 

188 

189 Notes 

190 ----- 

191 - The paths returned are *absolute* 

192 - The search is performed depth-first 

193 """ 

194 SYNC_LOGGER.debug(f"ls {path}") 

195 top_level = client.listdir_attr(path) 

196 contents: list[tuple[Path, paramiko.sftp_attr.SFTPAttributes]] = [] 

197 for remote_object in top_level: 

198 remote_object.filename = posixpath.join(path, remote_object.filename) 

199 contents.append((Path(url2pathname(remote_object.filename)), remote_object)) 

200 if stat.S_ISDIR(remote_object.st_mode or 0): 

201 contents.extend(rglob(client, remote_object.filename)) 

202 return contents 

203 

204 

205def get_contents( 

206 client: paramiko.sftp_client.SFTPClient, path: str 

207) -> list[tuple[Path, paramiko.sftp_attr.SFTPAttributes]]: 

208 """Recursively fetch the contents of a remote directory 

209 

210 Parameters 

211 ---------- 

212 client : Paramiko SFTP client 

213 An authenticated client connected to the remote server 

214 path : str 

215 The absolute path to scan 

216 

217 Returns 

218 ------- 

219 list of (Path, SFTPAttributes) tuples 

220 The attributes of all files, folders and symlinks found under the 

221 specified path 

222 

223 Notes 

224 ----- 

225 - This list is generated via a depth-first search so that all parent 

226 directories appear before their children 

227 - The paths returned are relative to the provided path 

228 """ 

229 return [ 

230 (p.relative_to(url2pathname(path)), path_stat) 

231 for p, path_stat in rglob(client, path) 

232 ] 

233 

234 

235def pull( 

236 remote_uri: ParseResult, 

237 local_path: Path, 

238 exclude: Collection[str], 

239 dry_run: bool, 

240 timeout: float | None = None, 

241 delete: bool = True, 

242 **unsupported_kwargs, 

243) -> None: 

244 """Sync an upstream file or folder into the specified location SFTP. 

245 This will overwrite any files and folders already at the destination. 

246 

247 Parameters 

248 ---------- 

249 remote_uri : ParseResult 

250 The URI for the remote resource to copy from 

251 local_path : Path 

252 The destination folder 

253 exclude : list of str 

254 Any patterns that should be excluded from the sync 

255 dry_run : bool 

256 Whether to only simulate this sync (report the operations to be performed 

257 but not actually perform them) 

258 timeout : float, optional 

259 The number of seconds to wait before timing out the sync operation. 

260 If None is provided, no explicit timeout value will be set. 

261 delete : bool 

262 Whether part of the syncing should include deleting files at the destination 

263 that aren't at the source. Default is True. 

264 **unsupported_kwargs 

265 Any other provided options will be ignored 

266 

267 Raises 

268 ------ 

269 FileNotFoundError 

270 If the destination folder does not exist, or if the remote path 

271 does not exist 

272 OSError 

273 If the remote path cannot be accessed for any other reason (permissions, 

274 most likely) 

275 

276 Notes 

277 ----- 

278 - If the destination folder does not already exist, this method will not 

279 create it or its parent directories. 

280 """ 

281 if not local_path.exists(): 

282 raise FileNotFoundError(f"{local_path} does not exist") 

283 if unsupported_kwargs: 

284 SYNC_LOGGER.debug( 

285 "The following command-line options are ignored for this protocol:\n%s", 

286 "\n".join(" {}: {}".format(*item) for item in unsupported_kwargs.items()), 

287 ) 

288 

289 remote_loc = posixpath.normpath(unquote(remote_uri.path)) 

290 destination_path = local_path / posixpath.basename(remote_loc) 

291 

292 if destination_path.is_symlink() and not destination_path.is_dir(): 

293 SYNC_LOGGER.warning("Removing symlink %s", destination_path) 

294 if not dry_run: 

295 destination_path.unlink() 

296 else: 

297 SYNC_LOGGER.debug( 

298 "And replacing it entirely with the remote's %s", remote_loc 

299 ) 

300 return 

301 elif destination_path.exists() and not destination_path.is_dir(): 

302 SYNC_LOGGER.warning("Deleting file %s", destination_path) 

303 if not dry_run: 

304 destination_path.unlink() 

305 else: 

306 SYNC_LOGGER.debug( 

307 "And replacing it entirely with the remote's %s", remote_loc 

308 ) 

309 return 

310 

311 with connect(uri=remote_uri, timeout=timeout) as remote: 

312 try: 

313 source_target = remote.lstat(remote_loc) 

314 except OSError as bad_target: 

315 raise type(bad_target)( 

316 f"Could not access {remote_loc} on remote: {bad_target}" 

317 ) 

318 if not stat.S_ISDIR(source_target.st_mode or 0): 

319 if destination_path.exists() and is_identical( 

320 source_target, destination_path.stat() 

321 ): 

322 SYNC_LOGGER.warning( 

323 "Remote file matches %s. No transfer needed.", 

324 destination_path, 

325 ) 

326 return 

327 SYNC_LOGGER.debug( 

328 "Downloading file %s from remote", 

329 destination_path, 

330 ) 

331 if not dry_run: 

332 download_file( 

333 remote, 

334 remote_loc, 

335 destination_path, 

336 source_target, 

337 ) 

338 return 

339 

340 if not destination_path.exists(): 

341 SYNC_LOGGER.debug( 

342 "Downloading the entire contents of the remote's %s", remote_loc 

343 ) 

344 if dry_run: 

345 return 

346 destination_path.mkdir() 

347 

348 source_contents = filter_contents( 

349 get_contents(remote, remote_loc), 

350 exclude, 

351 prefix=remote_loc, 

352 ) 

353 destination_contents = filter_contents( 

354 file.get_contents(destination_path), 

355 exclude, 

356 prefix=destination_path, 

357 ) 

358 

359 sync_diff = diff(source_contents, destination_contents) 

360 

361 if dry_run: 

362 generate_sync_report(sync_diff) 

363 return 

364 

365 ignore = file.ignore_patterns(*exclude) 

366 for path, path_stat, operation in sync_diff: 

367 match (operation, stat.S_ISDIR(path_stat.st_mode or 0)): 

368 case (Op.CREATE, True): 

369 SYNC_LOGGER.debug("Creating directory %s", destination_path / path) 

370 (destination_path / path).mkdir(parents=True, exist_ok=True) 

371 case (Op.CREATE, False) | (Op.REPLACE, False): 

372 SYNC_LOGGER.debug( 

373 "Downloading file %s from remote", 

374 destination_path / path, 

375 ) 

376 (destination_path / path).unlink(missing_ok=True) 

377 download_file( 

378 remote, 

379 posixpath.join(remote_loc, path.as_posix()), 

380 destination_path / path, 

381 path_stat, # type: ignore[arg-type] 

382 ) 

383 case (Op.DELETE, True): 

384 # recall that for deletions, it's the *destination's* stats 

385 if delete: 

386 file.clean(destination_path / path, ignore, dry_run) 

387 case (Op.DELETE, False): 

388 SYNC_LOGGER.debug("Deleting file %s", destination_path / path) 

389 if delete: 

390 (destination_path / path).unlink() 

391 case op, is_dir: # pragma: no cover 

392 raise NotImplementedError( 

393 f"Don't know how to handle {op} of {'directory' if is_dir else 'file'}" 

394 ) 

395 

396 

397def push( 

398 local_path: Path, 

399 remote_uri: ParseResult, 

400 exclude: Collection[str], 

401 dry_run: bool, 

402 timeout: float | None = None, 

403 delete: bool = True, 

404 **unsupported_kwargs, 

405) -> None: 

406 """Sync a local file or folder into the specified location using SFTP. 

407 This will overwrite any files and folders already at the destination. 

408 

409 Parameters 

410 ---------- 

411 local_path : Path 

412 The file or folder to copy 

413 remote_uri : ParseResult 

414 The URI for the remote location to copy into 

415 exclude : list of str 

416 Any patterns that should be excluded from the sync 

417 dry_run : bool 

418 Whether to only simulate this sync (report the operations to be performed 

419 but not actually perform them) 

420 timeout : float, optional 

421 The number of seconds to wait before timing out the sync operation. 

422 If None is provided, no explicit timeout value will be set. 

423 delete : bool, optional 

424 Whether part of the syncing should include deleting files at the destination 

425 that aren't at the source. Default is True. 

426 **unsupported_kwargs 

427 Any other provided options will be ignored 

428 

429 Notes 

430 ----- 

431 - If the destination folder does not already exist, this method will not 

432 create it or its parent directories. 

433 """ 

434 if not local_path.exists(): 

435 raise FileNotFoundError(f"{local_path} does not exist.") 

436 if unsupported_kwargs: 

437 SYNC_LOGGER.debug( 

438 "The following command-line options are ignored for this protocol:\n%s", 

439 "\n".join(" {}: {}".format(*item) for item in unsupported_kwargs.items()), 

440 ) 

441 

442 remote_parent = posixpath.normpath(unquote(remote_uri.path)) 

443 

444 with connect(uri=remote_uri, timeout=timeout) as remote: 

445 try: 

446 remote_folder_stat = remote.lstat(remote_parent) 

447 except OSError as bad_target: 

448 raise type(bad_target)( 

449 f"Could not access {remote_parent} on remote: {bad_target}" 

450 ) 

451 if not stat.S_ISDIR(remote_folder_stat.st_mode or 0): 

452 raise NotADirectoryError(f"{remote_parent} on remote is not a directory.") 

453 

454 remote_loc = posixpath.join(remote_parent, local_path.name) 

455 try: 

456 target_stat = remote.lstat(remote_loc) 

457 except FileNotFoundError: 

458 target_stat = None 

459 if not stat.S_ISDIR(local_path.stat().st_mode or 0): 

460 if target_stat and is_identical(local_path.stat(), target_stat): 

461 SYNC_LOGGER.warning("Remote file matches %s", local_path) 

462 return 

463 

464 SYNC_LOGGER.debug( 

465 "Uploading file %s to remote", 

466 local_path, 

467 ) 

468 if not dry_run: 

469 upload_file(remote, local_path, remote_loc) 

470 return 

471 if not target_stat: 

472 SYNC_LOGGER.debug("Uploading the entire contents %s", local_path) 

473 if dry_run: 

474 return 

475 remote.mkdir(remote_loc) 

476 elif not stat.S_ISDIR(target_stat.st_mode or 0): 

477 SYNC_LOGGER.warning( 

478 "Deleting remote file or symlink %s", 

479 remote_loc, 

480 ) 

481 if dry_run: 

482 SYNC_LOGGER.debug("And replacing it entirely with %s", local_path) 

483 return 

484 remote.remove(remote_loc) 

485 remote.mkdir(remote_loc) 

486 

487 source_contents = filter_contents( 

488 file.get_contents(local_path), exclude, prefix=local_path 

489 ) 

490 destination_contents = filter_contents( 

491 get_contents(remote, remote_loc), 

492 exclude, 

493 prefix=remote_loc, 

494 ) 

495 

496 sync_diff = diff(source_contents, destination_contents) 

497 

498 if dry_run: 

499 generate_sync_report(sync_diff) 

500 return 

501 

502 for path, path_stat, operation in sync_diff: 

503 posix_path = posixpath.join(remote_loc, path.as_posix()) 

504 match (operation, stat.S_ISDIR(path_stat.st_mode or 0)): 

505 case (Op.CREATE, True): 

506 SYNC_LOGGER.debug("Creating remote directory %s", posix_path) 

507 remote.mkdir(posix_path) 

508 case (Op.CREATE, False) | (Op.REPLACE, False): 

509 SYNC_LOGGER.debug( 

510 "Uploading file %s to remote", 

511 local_path / path, 

512 ) 

513 try: 

514 remote.remove(posix_path) 

515 except FileNotFoundError: 

516 pass 

517 upload_file( 

518 remote, 

519 local_path / path, 

520 posix_path, 

521 ) 

522 case (Op.DELETE, True): 

523 # recall that for deletions, it's the *destination's* stats 

524 if delete: 

525 remote.rmdir(posix_path) 

526 case (Op.DELETE, False): 

527 if delete: 

528 SYNC_LOGGER.debug("Deleting remote file %s", posix_path) 

529 remote.remove(posix_path) 

530 case op, is_dir: # pragma: no cover 

531 raise NotImplementedError( 

532 f"Don't know how to handle {op} of {'directory' if is_dir else 'file'}" 

533 )