-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsync.py
executable file
·312 lines (242 loc) · 10.8 KB
/
sync.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
#!/usr/bin/env python3
import argparse
import xmlrpc.client
import os
import json
import datetime
import time
from pathlib import Path
import logging
logger = logging.getLogger('Sync')
class Phone:
def __init__(self, url):
self.url = url
self.client = xmlrpc.client.ServerProxy(url, allow_none=True)
def __getattr__(self, name, **kwargs):
if hasattr(self.client, name):
return getattr(self.client, name, **kwargs)
class Storage:
def __init__(self, dir, path, metadata_path):
self.dir = dir
self.path = path
self.metadata_path = metadata_path
@staticmethod
def metadata_for_path(asset):
import copy
z = copy.copy(asset)
formats = ("Y", "m")
for (suffix, key) in (("create", "creation_date"), ("mod", "modification_date")):
t = asset[key]
for f in formats:
z[f + "_" + suffix] = datetime.datetime.utcfromtimestamp(t).strftime('%'+f)
return z
def get_path(self, asset):
m = self.metadata_for_path(asset)
return os.path.join(self.dir, self.path.format(**m))
def get_metadata_path(self, asset):
m = self.metadata_for_path(asset)
p = Path(os.path.join(self.dir, self.metadata_path.format(**m)))
return p.with_suffix('.json')
def files_to_sync(self, on_phone):
to_sync = []
for asset in on_phone:
path_to_metadata = self.get_metadata_path(asset)
if not os.path.isfile(path_to_metadata):
logger.debug(f'Syncing {asset["local_id"]} because missing.')
to_sync.append(asset)
continue
# The file exists, check if modified date is the same.
with open(path_to_metadata) as f:
data = json.load(f)
if data["modification_date"] != asset["modification_date"]:
logger.debug(f'Syncing {asset["local_id"]} modification_date differs.')
to_sync.append(asset)
continue
# nothing to do!
logger.debug(f'Skipping {asset["local_id"]} already got it.')
return to_sync
def load_from_disk(self, asset):
path_to_metadata = self.get_metadata_path(asset)
with open(path_to_metadata) as f:
data = json.load(f)
# We got the metadata, now add the filesize and md5sum.
get_path = self.get_path(asset)
# Read it back to obtain the md5.
with open(get_path, "rb") as f:
z = f.read()
# calculate the hash.
import hashlib
m = hashlib.md5()
m.update(z)
h = m.hexdigest()
data["_filesize"] = len(z)
data["_md5"] = h
return data
def retrieve(self, p, asset):
get_path = self.get_path(asset)
path_to_metadata = self.get_metadata_path(asset)
logger.debug(f'Retrieving id: {asset["local_id"]} modified at {asset["modification_date"]}')
retrieved = p.retrieve_asset_by_local_id(asset["local_id"])
logger.debug(f' Retrieved {len(retrieved["_data"].data)} bytes')
# Ensure directories exist.
os.makedirs(os.path.dirname(get_path), exist_ok=True)
os.makedirs(os.path.dirname(path_to_metadata), exist_ok=True)
# Next, write the actual data.
with open(get_path, "wb") as f:
f.write(retrieved["_data"].data)
# Read it back to obtain the md5.
with open(get_path, "rb") as f:
z = f.read()
logger.debug(f' Data size: {len(z)}')
logger.debug(f' _filesize: {retrieved["_filesize"]}')
if len(z) != retrieved["_filesize"]:
raise BaseException(f"File size incorrect for {get_path}, expected {len(z)}.")
import hashlib
m = hashlib.md5()
m.update(z)
h = m.hexdigest()
expected = retrieved["_md5"]
logger.debug(f' md5: {h}')
logger.debug(f' _md5: {retrieved["_md5"]}')
if h != expected:
raise BaseException(f"Md5 does not match! Got {h} for {get_path}, expected {expected}")
# we got here, file retrieved correctly, write the metadata
clean_metadata = {k:v for k, v in retrieved.items() if not k.startswith("_")}
logger.debug(f' Writing metadata.')
with open(path_to_metadata, "w") as f:
json.dump(clean_metadata, f)
return retrieved
def run_sync(args):
logger.info(f'Running sync.')
logger.debug(f' host: {args.host}')
logger.debug(f' dir: {args.dir}')
logger.debug(f' path: {args.path}')
logger.debug(f' metadata_path: {args.metadata_path}')
p = Phone(args.host)
sync = Storage(dir=args.dir, path=args.path, metadata_path=args.metadata_path)
on_phone = p.get_all_metadata()
logger.info(f"On phone: {len(on_phone)}")
to_sync = sync.files_to_sync(on_phone)
logger.info(f"To sync : {len(to_sync)}")
total = len(to_sync)
for i, asset in enumerate(to_sync):
retrieved = sync.retrieve(p, asset)
filename = retrieved["filename"]
size = retrieved["_filesize"]
date = datetime.datetime.utcfromtimestamp(retrieved["creation_date"]).strftime('%Y-%m-%d %H:%M:%S')
logger.info(f"{i+1: >5} / {total: >5}: {filename: >20} {date} ({size: >9} bytes)")
def run_test(args):
p = Phone(args.host)
print(p.client.get_asset_collections())
metadata = p.client.get_all_metadata()
img = [f for f in metadata if f["media_type"] == "image"]
print(img)
d = p.client.get_all_metadata()
r = p.client.retrieve_asset_by_local_id(d[-1]["local_id"])
print(r)
def run_delete(args):
logger.info(f'Running deletion.')
logger.debug(f' host: {args.host}')
logger.debug(f' dir: {args.dir}')
logger.debug(f' path: {args.path}')
logger.debug(f' metadata_path: {args.metadata_path}')
logger.debug(f' retain_duration: {args.retain_duration}')
p = Phone(args.host)
sync = Storage(dir=args.dir, path=args.path, metadata_path=args.metadata_path)
# Obtain whatever we have on the phone.
logger.info(f'Obtaining metadata from phone.')
on_phone = p.get_all_metadata()
logger.info(f'Total assets: {len(on_phone)}')
# Obtain all asset collections.
asset_collections = p.client.get_asset_collections()
# We're only interested in manually created albums.
manual_albums = asset_collections["albums"]
# Collect all photos that are part of a manual album, they are always preserved.
keep_photos = set()
for album in manual_albums:
for asset in album["assets"]:
keep_photos.add(asset["local_id"])
logger.info(f'Assets in albums: {len(keep_photos)}')
to_prune = []
# Next, we can iterate through the photos on the phone, check against expiry.
now = time.time()
for asset in on_phone:
staleness = now - asset["modification_date"]
logger.debug(f'Considering {asset["local_id"]} with age {staleness} seconds.')
if staleness >= args.retain_duration:
logger.debug(f' is older than retain')
if asset["local_id"] in keep_photos:
logger.debug(f" Preserving {asset['local_id']} {asset['filename']} because in keep.")
if asset["local_id"] not in keep_photos:
logger.debug(f" {asset['filename']} marking for deletion")
to_prune.append(asset)
logger.info(f'To prune: {len(to_prune)}')
# Ok, now that we have a list of to-be-pruned entries, we have to prove to the phone that we got
# the photo and metadata.
logger.info(f'Calculating proof we have asset marked for deletion.')
to_prune_proof = []
for asset in to_prune:
to_prune_proof.append(sync.load_from_disk(asset))
logger.info(f'Obtained {len(to_prune_proof)} proofs.')
# Now that we have assembled our proof, we can _finally_ tell the phone to remove these entries.
# print(to_prune_proof)
logger.info(f'Issuing deletion, phone will check proof and prompt.')
p.delete_assets_by_metadata(to_prune_proof, args.ignore_integrity)
logger.info(f'Done.')
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Retrieval")
parser.add_argument('-v', '--verbosity', action="count", help="Increase verbosity,"
"nothing is warn/error only, -v is info, -vv is debug.")
parser.add_argument("--host", default="http://$REPL_HOST:1338", help="xmlrpc interface to connect to. Defaults to %(default)s")
subparsers = parser.add_subparsers(dest="command")
test_parser = subparsers.add_parser('test')
test_parser.set_defaults(func=run_test)
def add_storage_args(parse):
parse.add_argument("--dir", default="/tmp/storage", help="Directory to write output to.")
parse.add_argument("--path", default="{Y_create}-{m_create}/{filename}", help="Format to use when writing.")
parse.add_argument("--metadata-path", default="{Y_create}-{m_create}/metadata/{filename}",
help="Format to use when writing metadata, extension is replaced with .json.")
sync_parser = subparsers.add_parser('sync')
add_storage_args(sync_parser)
sync_parser.set_defaults(func=run_sync)
def sane_date_parser(v):
day = 60 * 60 * 24
week = 7 * day
month = 31 * day
scaling = day
value = float('inf')
if v.endswith("d"):
value = float(v[0:-1])
scaling = day
elif v.endswith("m"):
value = float(v[0:-1])
scaling = month
elif v.endswith("w"):
value = float(v[0:-1])
scaling = week
else:
raise BaseException("Date should end with 'd' for days, 'w' for weeks, 'm' for months")
return scaling * value
delete_parser = subparsers.add_parser('delete', help="Remove files older than given duration and not in a manually created album.")
add_storage_args(delete_parser)
delete_parser.add_argument("--retain-duration", default="30d", type=sane_date_parser, help="Duration to keep. Default: %(default)s, d=day, w=week, m=month")
delete_parser.add_argument("--ignore-integrity", default=False, action="store_true", help="Skip the integrity check.")
delete_parser.set_defaults(func=run_delete)
args = parser.parse_args()
logger.setLevel(logging.WARN)
if args.verbosity == 2:
logger.setLevel(logging.DEBUG)
if args.verbosity == 1:
logger.setLevel(logging.INFO)
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
ch.setFormatter(formatter)
logger.addHandler(ch)
if "REPL_HOST" in os.environ:
args.host = args.host.replace("$REPL_HOST", os.environ["REPL_HOST"])
# no command
if (args.command is None):
parser.print_help()
parser.exit()
args.func(args)