Coverage for enderchest/sync/sftp.py: 89%

180 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-02-06 16:00 +0000

1"""paramiko-based sftp sync implementation""" 

2import os 

3import posixpath 

4import stat 

5from contextlib import contextmanager 

6from pathlib import Path 

7from typing import Any, Collection, Generator 

8from urllib.parse import ParseResult, unquote 

9from urllib.request import url2pathname 

10 

11import paramiko 

12 

13from ..prompt import prompt 

14from . import ( 

15 SYNC_LOGGER, 

16 Op, 

17 diff, 

18 file, 

19 filter_contents, 

20 generate_sync_report, 

21 is_identical, 

22) 

23 

24 

25@contextmanager 

26def connect( 

27 uri: ParseResult, timeout: float | None = None 

28) -> Generator[paramiko.sftp_client.SFTPClient, None, None]: 

29 """Yield an SFTPClient connected to the server specified by the given URI 

30 

31 Parameters 

32 ---------- 

33 uri : ParseResult 

34 The URI of the EnderChest to connect to 

35 timeout : float, optional 

36 The number of seconds to wait before timing out the sync operation. 

37 If None is provided, no explicit timeout value will be set. 

38 

39 Yields 

40 ------ 

41 SFTPClient 

42 A Paramiko SFTP client connected to the specified server 

43 

44 Raises 

45 ------ 

46 ValueError 

47 If the URI is invalid or the credentials are incorrect 

48 RuntimeError 

49 If the server cannot be reached 

50 """ 

51 ssh_client = paramiko.client.SSHClient() 

52 ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) 

53 

54 extra_kwargs: dict[str, Any] = {} 

55 if timeout is not None: 

56 extra_kwargs["timeout"] = timeout 

57 

58 try: 

59 ssh_client.connect( 

60 uri.hostname or "localhost", 

61 port=uri.port or 22, 

62 username=uri.username, 

63 # note: passing in password is explicitly unsupported 

64 **extra_kwargs, 

65 ) 

66 except paramiko.AuthenticationException: 

67 target = ((uri.username + "@") if uri.username else "") + ( 

68 uri.hostname or "localhost" 

69 ) 

70 

71 SYNC_LOGGER.warning( 

72 f"This machine is not set up for passwordless login to {target}" 

73 "\nFor instructions on setting up public key-based authentication," 

74 " which is both" 

75 "\nmore convenient and more secure, see:" 

76 "\nhttps://openbagtwo.github.io/EnderChest" 

77 "/dev/suggestions/#passwordless-ssh-authentication" 

78 ) 

79 password = prompt(f"Please enter the password for {target}", is_password=True) 

80 try: 

81 ssh_client.connect( 

82 uri.hostname or "localhost", 

83 port=uri.port or 22, 

84 username=uri.username, 

85 password=password, 

86 **extra_kwargs, 

87 ) 

88 except paramiko.AuthenticationException as bad_login: 

89 raise ValueError( 

90 "Authentication failed." 

91 " Did you supply the correct username and password?" 

92 ) from bad_login 

93 

94 try: 

95 sftp_client = ssh_client.open_sftp() 

96 yield sftp_client 

97 sftp_client.close() 

98 finally: 

99 ssh_client.close() 

100 

101 

102def download_file( 

103 client: paramiko.sftp_client.SFTPClient, 

104 remote_loc: str, 

105 local_path: Path, 

106 remote_stat: paramiko.SFTPAttributes, 

107) -> None: 

108 """Download a file from a remote SFTP server and save it at the specified 

109 location. 

110 

111 Parameters 

112 ---------- 

113 client : Paramiko SFTP client 

114 An authenticated client connected to the remote server 

115 remote_loc : str 

116 The POSIX path of the file to download 

117 local_path : Path 

118 The path to locally save the file 

119 remote_stat : stat-like 

120 The `os.stat_result`-like properties of the remote object 

121 

122 Notes 

123 ----- 

124 This is a wrapper around `client.get()` that can handle symlinks and 

125 updating timestamps. It does not check if either path is valid, points 

126 to a file, lives in an existing folder, etc. 

127 """ 

128 if stat.S_ISLNK(remote_stat.st_mode or 0): 

129 local_path.symlink_to(Path((client.readlink(remote_loc) or ""))) 

130 else: 

131 client.get(remote_loc, local_path) 

132 if remote_stat.st_atime and remote_stat.st_mtime: 

133 os.utime( 

134 local_path, 

135 times=(remote_stat.st_atime, remote_stat.st_mtime), 

136 ) 

137 

138 

139def upload_file( 

140 client: paramiko.sftp_client.SFTPClient, 

141 local_path: Path, 

142 remote_loc: str, 

143) -> None: 

144 """Upload a local file to a remote SFTP server 

145 

146 Parameters 

147 ---------- 

148 client : Paramiko SFTP client 

149 An authenticated client connected to the remote server 

150 local_path : Path 

151 The path of the file to upload 

152 remote_loc : str 

153 The POSIX path for the remote location to save the file 

154 

155 Notes 

156 ----- 

157 This is just a wrapper around `client.put()` that can handle symlinks. 

158 It does not check if either path is valid, points to a file, lives in an 

159 existing folder, etc. 

160 """ 

161 if local_path.is_symlink(): 

162 client.symlink(local_path.readlink().as_posix(), remote_loc) 

163 else: 

164 client.put(local_path, remote_loc) 

165 client.utime( 

166 remote_loc, times=(local_path.stat().st_atime, local_path.stat().st_mtime) 

167 ) 

168 

169 

170def rglob( 

171 client: paramiko.sftp_client.SFTPClient, path: str 

172) -> list[tuple[Path, paramiko.sftp_attr.SFTPAttributes]]: 

173 """Recursively enumerate the contents of a remote directory 

174 

175 Parameters 

176 ---------- 

177 client : Paramiko SFTP client 

178 An authenticated client connected to the remote server 

179 path : str 

180 The absolute path to scan 

181 

182 Returns 

183 ------- 

184 list of (Path, SFTPAttributes) tuples 

185 The attributes of all files, folders and symlinks found under the 

186 specified path 

187 

188 Notes 

189 ----- 

190 - The paths returned are *absolute* 

191 - The search is performed depth-first 

192 """ 

193 SYNC_LOGGER.debug(f"ls {path}") 

194 top_level = client.listdir_attr(path) 

195 contents: list[tuple[Path, paramiko.sftp_attr.SFTPAttributes]] = [] 

196 for remote_object in top_level: 

197 remote_object.filename = posixpath.join(path, remote_object.filename) 

198 contents.append((Path(url2pathname(remote_object.filename)), remote_object)) 

199 if stat.S_ISDIR(remote_object.st_mode or 0): 

200 contents.extend(rglob(client, remote_object.filename)) 

201 return contents 

202 

203 

204def get_contents( 

205 client: paramiko.sftp_client.SFTPClient, path: str 

206) -> list[tuple[Path, paramiko.sftp_attr.SFTPAttributes]]: 

207 """Recursively fetch the contents of a remote directory 

208 

209 Parameters 

210 ---------- 

211 client : Paramiko SFTP client 

212 An authenticated client connected to the remote server 

213 path : str 

214 The absolute path to scan 

215 

216 Returns 

217 ------- 

218 list of (Path, SFTPAttributes) tuples 

219 The attributes of all files, folders and symlinks found under the 

220 specified path 

221 

222 Notes 

223 ----- 

224 - This list is generated via a depth-first search so that all parent 

225 directories appear before their children 

226 - The paths returned are relative to the provided path 

227 """ 

228 return [ 

229 (p.relative_to(url2pathname(path)), path_stat) 

230 for p, path_stat in rglob(client, path) 

231 ] 

232 

233 

234def pull( 

235 remote_uri: ParseResult, 

236 local_path: Path, 

237 exclude: Collection[str], 

238 dry_run: bool, 

239 timeout: float | None = None, 

240 delete: bool = True, 

241 **unsupported_kwargs, 

242) -> None: 

243 """Sync an upstream file or folder into the specified location SFTP. 

244 This will overwrite any files and folders already at the destination. 

245 

246 Parameters 

247 ---------- 

248 remote_uri : ParseResult 

249 The URI for the remote resource to copy from 

250 local_path : Path 

251 The destination folder 

252 exclude : list of str 

253 Any patterns that should be excluded from the sync 

254 dry_run : bool 

255 Whether to only simulate this sync (report the operations to be performed 

256 but not actually perform them) 

257 timeout : float, optional 

258 The number of seconds to wait before timing out the sync operation. 

259 If None is provided, no explicit timeout value will be set. 

260 delete : bool 

261 Whether part of the syncing should include deleting files at the destination 

262 that aren't at the source. Default is True. 

263 **unsupported_kwargs 

264 Any other provided options will be ignored 

265 

266 Raises 

267 ------ 

268 FileNotFoundError 

269 If the destination folder does not exist, or if the remote path 

270 does not exist 

271 OSError 

272 If the remote path cannot be accessed for any other reason (permissions, 

273 most likely) 

274 

275 Notes 

276 ----- 

277 - If the destination folder does not already exist, this method will not 

278 create it or its parent directories. 

279 """ 

280 if not local_path.exists(): 

281 raise FileNotFoundError(f"{local_path} does not exist") 

282 if unsupported_kwargs: 

283 SYNC_LOGGER.debug( 

284 "The following command-line options are ignored for this protocol:\n%s", 

285 "\n".join(" {}: {}".format(*item) for item in unsupported_kwargs.items()), 

286 ) 

287 

288 remote_loc = posixpath.normpath(unquote(remote_uri.path)) 

289 destination_path = local_path / posixpath.basename(remote_loc) 

290 

291 if destination_path.is_symlink() and not destination_path.is_dir(): 

292 SYNC_LOGGER.warning("Removing symlink %s", destination_path) 

293 if not dry_run: 

294 destination_path.unlink() 

295 else: 

296 SYNC_LOGGER.debug( 

297 "And replacing it entirely with the remote's %s", remote_loc 

298 ) 

299 return 

300 elif destination_path.exists() and not destination_path.is_dir(): 

301 SYNC_LOGGER.warning("Deleting file %s", destination_path) 

302 if not dry_run: 

303 destination_path.unlink() 

304 else: 

305 SYNC_LOGGER.debug( 

306 "And replacing it entirely with the remote's %s", remote_loc 

307 ) 

308 return 

309 

310 with connect(uri=remote_uri, timeout=timeout) as remote: 

311 try: 

312 source_target = remote.lstat(remote_loc) 

313 except OSError as bad_target: 

314 raise type(bad_target)( 

315 f"Could not access {remote_loc} on remote: {bad_target}" 

316 ) 

317 if not stat.S_ISDIR(source_target.st_mode or 0): 

318 if destination_path.exists() and is_identical( 

319 source_target, destination_path.stat() 

320 ): 

321 SYNC_LOGGER.warning( 

322 "Remote file matches %s. No transfer needed.", 

323 destination_path, 

324 ) 

325 return 

326 SYNC_LOGGER.debug( 

327 "Downloading file %s from remote", 

328 destination_path, 

329 ) 

330 if not dry_run: 

331 download_file( 

332 remote, 

333 remote_loc, 

334 destination_path, 

335 source_target, 

336 ) 

337 return 

338 

339 if not destination_path.exists(): 

340 SYNC_LOGGER.debug( 

341 "Downloading the entire contents of the remote's %s", remote_loc 

342 ) 

343 if dry_run: 

344 return 

345 destination_path.mkdir() 

346 

347 source_contents = filter_contents( 

348 get_contents(remote, remote_loc), 

349 exclude, 

350 prefix=remote_loc, 

351 ) 

352 destination_contents = filter_contents( 

353 file.get_contents(destination_path), 

354 exclude, 

355 prefix=destination_path, 

356 ) 

357 

358 sync_diff = diff(source_contents, destination_contents) 

359 

360 if dry_run: 

361 generate_sync_report(sync_diff) 

362 return 

363 

364 ignore = file.ignore_patterns(*exclude) 

365 for path, path_stat, operation in sync_diff: 

366 match (operation, stat.S_ISDIR(path_stat.st_mode or 0)): 

367 case (Op.CREATE, True): 

368 SYNC_LOGGER.debug("Creating directory %s", destination_path / path) 

369 (destination_path / path).mkdir(parents=True, exist_ok=True) 

370 case (Op.CREATE, False) | (Op.REPLACE, False): 

371 SYNC_LOGGER.debug( 

372 "Downloading file %s from remote", 

373 destination_path / path, 

374 ) 

375 (destination_path / path).unlink(missing_ok=True) 

376 download_file( 

377 remote, 

378 posixpath.join(remote_loc, path.as_posix()), 

379 destination_path / path, 

380 path_stat, # type: ignore[arg-type] 

381 ) 

382 case (Op.DELETE, True): 

383 # recall that for deletions, it's the *destination's* stats 

384 if delete: 

385 file.clean(destination_path / path, ignore, dry_run) 

386 case (Op.DELETE, False): 

387 SYNC_LOGGER.debug("Deleting file %s", destination_path / path) 

388 if delete: 

389 (destination_path / path).unlink() 

390 case op, is_dir: # pragma: no cover 

391 raise NotImplementedError( 

392 f"Don't know how to handle {op} of {'directory' if is_dir else 'file'}" 

393 ) 

394 

395 

396def push( 

397 local_path: Path, 

398 remote_uri: ParseResult, 

399 exclude: Collection[str], 

400 dry_run: bool, 

401 timeout: float | None = None, 

402 delete: bool = True, 

403 **unsupported_kwargs, 

404) -> None: 

405 """Sync a local file or folder into the specified location using SFTP. 

406 This will overwrite any files and folders already at the destination. 

407 

408 Parameters 

409 ---------- 

410 local_path : Path 

411 The file or folder to copy 

412 remote_uri : ParseResult 

413 The URI for the remote location to copy into 

414 exclude : list of str 

415 Any patterns that should be excluded from the sync 

416 dry_run : bool 

417 Whether to only simulate this sync (report the operations to be performed 

418 but not actually perform them) 

419 timeout : float, optional 

420 The number of seconds to wait before timing out the sync operation. 

421 If None is provided, no explicit timeout value will be set. 

422 delete : bool, optional 

423 Whether part of the syncing should include deleting files at the destination 

424 that aren't at the source. Default is True. 

425 **unsupported_kwargs 

426 Any other provided options will be ignored 

427 

428 Notes 

429 ----- 

430 - If the destination folder does not already exist, this method will not 

431 create it or its parent directories. 

432 """ 

433 if not local_path.exists(): 

434 raise FileNotFoundError(f"{local_path} does not exist.") 

435 if unsupported_kwargs: 

436 SYNC_LOGGER.debug( 

437 "The following command-line options are ignored for this protocol:\n%s", 

438 "\n".join(" {}: {}".format(*item) for item in unsupported_kwargs.items()), 

439 ) 

440 

441 remote_parent = posixpath.normpath(unquote(remote_uri.path)) 

442 

443 with connect(uri=remote_uri, timeout=timeout) as remote: 

444 try: 

445 remote_folder_stat = remote.lstat(remote_parent) 

446 except OSError as bad_target: 

447 raise type(bad_target)( 

448 f"Could not access {remote_parent} on remote: {bad_target}" 

449 ) 

450 if not stat.S_ISDIR(remote_folder_stat.st_mode or 0): 

451 raise NotADirectoryError(f"{remote_parent} on remote is not a directory.") 

452 

453 remote_loc = posixpath.join(remote_parent, local_path.name) 

454 try: 

455 target_stat = remote.lstat(remote_loc) 

456 except FileNotFoundError: 

457 target_stat = None 

458 if not stat.S_ISDIR(local_path.stat().st_mode or 0): 

459 if target_stat and is_identical(local_path.stat(), target_stat): 

460 SYNC_LOGGER.warning("Remote file matches %s", local_path) 

461 return 

462 

463 SYNC_LOGGER.debug( 

464 "Uploading file %s to remote", 

465 local_path, 

466 ) 

467 if not dry_run: 

468 upload_file(remote, local_path, remote_loc) 

469 return 

470 if not target_stat: 

471 SYNC_LOGGER.debug("Uploading the entire contents %s", local_path) 

472 if dry_run: 

473 return 

474 remote.mkdir(remote_loc) 

475 elif not stat.S_ISDIR(target_stat.st_mode or 0): 

476 SYNC_LOGGER.warning( 

477 "Deleting remote file or symlink %s", 

478 remote_loc, 

479 ) 

480 if dry_run: 

481 SYNC_LOGGER.debug("And replacing it entirely with %s", local_path) 

482 return 

483 remote.remove(remote_loc) 

484 remote.mkdir(remote_loc) 

485 

486 source_contents = filter_contents( 

487 file.get_contents(local_path), exclude, prefix=local_path 

488 ) 

489 destination_contents = filter_contents( 

490 get_contents(remote, remote_loc), 

491 exclude, 

492 prefix=remote_loc, 

493 ) 

494 

495 sync_diff = diff(source_contents, destination_contents) 

496 

497 if dry_run: 

498 generate_sync_report(sync_diff) 

499 return 

500 

501 for path, path_stat, operation in sync_diff: 

502 posix_path = posixpath.join(remote_loc, path.as_posix()) 

503 match (operation, stat.S_ISDIR(path_stat.st_mode or 0)): 

504 case (Op.CREATE, True): 

505 SYNC_LOGGER.debug("Creating remote directory %s", posix_path) 

506 remote.mkdir(posix_path) 

507 case (Op.CREATE, False) | (Op.REPLACE, False): 

508 SYNC_LOGGER.debug( 

509 "Uploading file %s to remote", 

510 local_path / path, 

511 ) 

512 try: 

513 remote.remove(posix_path) 

514 except FileNotFoundError: 

515 pass 

516 upload_file( 

517 remote, 

518 local_path / path, 

519 posix_path, 

520 ) 

521 case (Op.DELETE, True): 

522 # recall that for deletions, it's the *destination's* stats 

523 if delete: 

524 remote.rmdir(posix_path) 

525 case (Op.DELETE, False): 

526 if delete: 

527 SYNC_LOGGER.debug("Deleting remote file %s", posix_path) 

528 remote.remove(posix_path) 

529 case op, is_dir: # pragma: no cover 

530 raise NotImplementedError( 

531 f"Don't know how to handle {op} of {'directory' if is_dir else 'file'}" 

532 )