-
Notifications
You must be signed in to change notification settings - Fork 318
Expand file tree
/
Copy pathsortphotos.py
More file actions
executable file
·538 lines (420 loc) · 19.2 KB
/
sortphotos.py
File metadata and controls
executable file
·538 lines (420 loc) · 19.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
#!/usr/bin/env python
# encoding: utf-8
"""
sortphotos.py
Created on 3/2/2013
Copyright (c) S. Andrew Ning. All rights reserved.
"""
from __future__ import print_function
from __future__ import with_statement
import subprocess
import os
import sys
import shutil
try:
import json
except:
import simplejson as json
import filecmp
from datetime import datetime, timedelta
import re
import locale
# Setting locale to the 'local' value
locale.setlocale(locale.LC_ALL, '')
exiftool_location = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Image-ExifTool', 'exiftool')
# -------- convenience methods -------------
def parse_date_exif(date_string):
"""
extract date info from EXIF data
YYYY:MM:DD HH:MM:SS
or YYYY:MM:DD HH:MM:SS+HH:MM
or YYYY:MM:DD HH:MM:SS-HH:MM
or YYYY:MM:DD HH:MM:SSZ
"""
# split into date and time
elements = str(date_string).strip().split() # ['YYYY:MM:DD', 'HH:MM:SS']
if len(elements) < 1:
return None
# parse year, month, day
date_entries = elements[0].split(':') # ['YYYY', 'MM', 'DD']
# check if three entries, nonzero data, and no decimal (which occurs for timestamps with only time but no date)
if len(date_entries) == 3 and date_entries[0] > '0000' and '.' not in ''.join(date_entries):
year = int(date_entries[0])
month = int(date_entries[1])
day = int(date_entries[2])
else:
return None
# parse hour, min, second
time_zone_adjust = False
hour = 12 # defaulting to noon if no time data provided
minute = 0
second = 0
if len(elements) > 1:
time_entries = re.split('(\+|-|Z)', elements[1]) # ['HH:MM:SS', '+', 'HH:MM']
time = time_entries[0].split(':') # ['HH', 'MM', 'SS']
if len(time) == 3:
hour = int(time[0])
minute = int(time[1])
second = int(time[2].split('.')[0])
elif len(time) == 2:
hour = int(time[0])
minute = int(time[1])
# adjust for time-zone if needed
if len(time_entries) > 2:
time_zone = time_entries[2].split(':') # ['HH', 'MM']
if len(time_zone) == 2:
time_zone_hour = int(time_zone[0])
time_zone_min = int(time_zone[1])
# check if + or -
if time_entries[1] == '-':
time_zone_hour *= -1
dateadd = timedelta(hours=time_zone_hour, minutes=time_zone_min)
time_zone_adjust = True
# form date object
try:
date = datetime(year, month, day, hour, minute, second)
except ValueError:
return None # errors in time format
# try converting it (some "valid" dates are way before 1900 and cannot be parsed by strtime later)
try:
date.strftime('%Y/%m-%b') # any format with year, month, day, would work here.
except ValueError:
return None # errors in time format
# adjust for time zone if necessary
if time_zone_adjust:
date += dateadd
return date
def get_oldest_timestamp(data, additional_groups_to_ignore, additional_tags_to_ignore, print_all_tags=False):
"""data as dictionary from json. Should contain only time stamps except SourceFile"""
# save only the oldest date
date_available = False
oldest_date = datetime.now()
oldest_keys = []
# save src file
src_file = data['SourceFile']
# ssetup tags to ignore
ignore_groups = ['ICC_Profile'] + additional_groups_to_ignore
ignore_tags = ['SourceFile', 'XMP:HistoryWhen'] + additional_tags_to_ignore
if print_all_tags:
print('All relevant tags:')
# run through all keys
for key in data.keys():
# check if this key needs to be ignored, or is in the set of tags that must be used
if (key not in ignore_tags) and (key.split(':')[0] not in ignore_groups) and 'GPS' not in key:
date = data[key]
if print_all_tags:
print(str(key) + ', ' + str(date))
# (rare) check if multiple dates returned in a list, take the first one which is the oldest
if isinstance(date, list):
date = date[0]
try:
exifdate = parse_date_exif(date) # check for poor-formed exif data, but allow continuation
except Exception as e:
exifdate = None
if exifdate and exifdate < oldest_date:
date_available = True
oldest_date = exifdate
oldest_keys = [key]
elif exifdate and exifdate == oldest_date:
oldest_keys.append(key)
if not date_available:
oldest_date = None
if print_all_tags:
print()
return src_file, oldest_date, oldest_keys
def check_for_early_morning_photos(date, day_begins):
"""check for early hour photos to be grouped with previous day"""
if date.hour < day_begins:
print('moving this photo to the previous day for classification purposes (day_begins=' + str(day_begins) + ')')
date = date - timedelta(hours=date.hour+1) # push it to the day before for classificiation purposes
return date
def process_AAE_sidecar(src, dst, action):
"""check for iOS AAE sidecar files"""
# check for a matching .AAE
src_dir, src_filename = os.path.split(src)
src_filename_base, src_filename_ext = os.path.splitext(src_filename)
src_aae_path = os.path.join(src_dir, (src_filename_base + ".AAE"))
# does it exist?
if os.path.exists(src_aae_path):
dst_dir, dst_filename = os.path.split(dst)
dst_filename_base, dst_filename_ext = os.path.splitext(dst_filename)
dst_aae_path = os.path.join(dst_dir, (dst_filename_base + ".AAE"))
print("AAE_path => " + src_aae_path + " To " + dst_aae_path)
# do something about it
if action == "copy":
shutil.copy2(src_aae_path, dst_aae_path)
elif action == "move":
shutil.move(src_aae_path, dst_aae_path)
return
# this class is based on code from Sven Marnach (http://stackoverflow.com/questions/10075115/call-exiftool-from-a-python-script)
class ExifTool(object):
"""used to run ExifTool from Python and keep it open"""
sentinel = "{ready}"
def __init__(self, executable=exiftool_location, verbose=False):
self.executable = executable
self.verbose = verbose
def __enter__(self):
self.process = subprocess.Popen(
['perl', self.executable, "-stay_open", "True", "-@", "-"],
stdin=subprocess.PIPE, stdout=subprocess.PIPE)
return self
def __exit__(self, exc_type, exc_value, traceback):
self.process.stdin.write(b'-stay_open\nFalse\n')
self.process.stdin.flush()
def execute(self, *args):
args = args + ("-execute\n",)
self.process.stdin.write(str.join("\n", args).encode('utf-8'))
self.process.stdin.flush()
output = ""
fd = self.process.stdout.fileno()
while not output.rstrip(' \t\n\r').endswith(self.sentinel):
increment = os.read(fd, 4096)
if self.verbose:
sys.stdout.write(increment.decode('utf-8'))
output += increment.decode('utf-8')
return output.rstrip(' \t\n\r')[:-len(self.sentinel)]
def get_metadata(self, *args):
try:
return json.loads(self.execute(*args))
except ValueError:
sys.stdout.write('No files to parse or invalid data\n')
exit()
# ---------------------------------------
def sortPhotos(src_dir, dest_dir, sort_format, rename_format, recursive=False,
copy_files=False, test=False, remove_duplicates=True, day_begins=0,
additional_groups_to_ignore=['File'], additional_tags_to_ignore=[],
use_only_groups=None, use_only_tags=None, verbose=True, keep_filename=False, keep_AAE=False):
"""
This function is a convenience wrapper around ExifTool based on common usage scenarios for sortphotos.py
Parameters
---------------
src_dir : str
directory containing files you want to process
dest_dir : str
directory where you want to move/copy the files to
sort_format : str
date format code for how you want your photos sorted
(https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior)
rename_format : str
date format code for how you want your files renamed
(https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior)
None to not rename file
recursive : bool
True if you want src_dir to be searched recursively for files (False to search only in top-level of src_dir)
copy_files : bool
True if you want files to be copied over from src_dir to dest_dir rather than moved
test : bool
True if you just want to simulate how the files will be moved without actually doing any moving/copying
remove_duplicates : bool
True to remove files that are exactly the same in name and a file hash
keep_filename : bool
True to append original filename in case of duplicates instead of increasing number
day_begins : int
what hour of the day you want the day to begin (only for classification purposes). Defaults at 0 as midnight.
Can be used to group early morning photos with the previous day. must be a number between 0-23
additional_groups_to_ignore : list(str)
tag groups that will be ignored when searching for file data. By default File is ignored
additional_tags_to_ignore : list(str)
specific tags that will be ignored when searching for file data.
use_only_groups : list(str)
a list of groups that will be exclusived searched across for date info
use_only_tags : list(str)
a list of tags that will be exclusived searched across for date info
verbose : bool
True if you want to see details of file processing
keep_AAE : bool
True if you want to include iOS AAE sidecar files in the sort
"""
# some error checking
if not os.path.exists(src_dir):
raise Exception('Source directory does not exist')
# setup arguments to exiftool
args = ['-j', '-a', '-G']
# setup tags to ignore
if use_only_tags is not None:
additional_groups_to_ignore = []
additional_tags_to_ignore = []
for t in use_only_tags:
args += ['-' + t]
elif use_only_groups is not None:
additional_groups_to_ignore = []
for g in use_only_groups:
args += ['-' + g + ':Time:All']
else:
args += ['-time:all']
if recursive:
args += ['-r']
args += [src_dir]
# get all metadata
with ExifTool(verbose=verbose) as e:
print('Preprocessing with ExifTool. May take a while for a large number of files.')
sys.stdout.flush()
metadata = e.get_metadata(*args)
# setup output to screen
num_files = len(metadata)
print()
if test:
test_file_dict = {}
# parse output extracting oldest relevant date
for idx, data in enumerate(metadata):
# extract timestamp date for photo
src_file, date, keys = get_oldest_timestamp(data, additional_groups_to_ignore, additional_tags_to_ignore)
# fixes further errors when using unicode characters like "\u20AC"
src_file.encode('utf-8')
if verbose:
# write out which photo we are at
ending = ']'
if test:
ending = '] (TEST - no files are being moved/copied)'
print('[' + str(idx+1) + '/' + str(num_files) + ending)
print('Source: ' + src_file)
else:
# progress bar
numdots = int(20.0*(idx+1)/num_files)
sys.stdout.write('\r')
sys.stdout.write('[%-20s] %d of %d ' % ('='*numdots, idx+1, num_files))
sys.stdout.flush()
# check if no valid date found
if not date:
if verbose:
print('No valid dates were found using the specified tags. File will remain where it is.')
print()
# sys.stdout.flush()
continue
# ignore hidden files
if os.path.basename(src_file).startswith('.'):
print('hidden file. will be skipped')
print()
continue
if verbose:
print('Date/Time: ' + str(date))
print('Corresponding Tags: ' + ', '.join(keys))
# early morning photos can be grouped with previous day (depending on user setting)
date = check_for_early_morning_photos(date, day_begins)
# create folder structure
dir_structure = date.strftime(sort_format)
dirs = dir_structure.split('/')
dest_file = dest_dir
for thedir in dirs:
dest_file = os.path.join(dest_file, thedir)
if not os.path.exists(dest_file):
os.makedirs(dest_file)
# rename file if necessary
filename = os.path.basename(src_file)
if rename_format is not None:
_, ext = os.path.splitext(filename)
filename = date.strftime(rename_format) + ext.lower()
# setup destination file
dest_file = os.path.join(dest_file, filename.encode('utf-8'))
root, ext = os.path.splitext(dest_file)
if verbose:
name = 'Destination '
if copy_files:
name += '(copy): '
else:
name += '(move): '
print(name + dest_file)
# check for collisions
append = 1
fileIsIdentical = False
while True:
if (not test and os.path.isfile(dest_file)) or (test and dest_file in test_file_dict.keys()): # check for existing name
if test:
dest_compare = test_file_dict[dest_file]
else:
dest_compare = dest_file
if remove_duplicates and filecmp.cmp(src_file, dest_compare): # check for identical files
fileIsIdentical = True
if verbose:
print('Identical file already exists. Duplicate will be ignored.\n')
break
else: # name is same, but file is different
if keep_filename:
orig_filename = os.path.splitext(os.path.basename(src_file))[0]
dest_file = root + '_' + orig_filename + '_' + str(append) + ext
else:
dest_file = root + '_' + str(append) + ext
append += 1
if verbose:
print('Same name already exists...renaming to: ' + dest_file)
else:
break
# finally move or copy the file
if test:
test_file_dict[dest_file] = src_file
else:
if fileIsIdentical:
continue # ignore identical files
else:
if copy_files:
shutil.copy2(src_file, dest_file)
# aae sidecar?
if keep_AAE:
process_AAE_sidecar(src_file, dest_file, "copy")
else:
shutil.move(src_file, dest_file)
# aae sidecar?
if keep_AAE:
process_AAE_sidecar(src_file, dest_file, "move")
if verbose:
print()
# sys.stdout.flush()
if not verbose:
print()
def main():
import argparse
# setup command line parsing
parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter,
description='Sort files (primarily photos and videos) into folders by date\nusing EXIF and other metadata')
parser.add_argument('src_dir', type=str, help='source directory')
parser.add_argument('dest_dir', type=str, help='destination directory')
parser.add_argument('-r', '--recursive', action='store_true', help='search src_dir recursively')
parser.add_argument('-c', '--copy', action='store_true', help='copy files instead of move')
parser.add_argument('-s', '--silent', action='store_true', help='don\'t display parsing details.')
parser.add_argument('-t', '--test', action='store_true', help='run a test. files will not be moved/copied\ninstead you will just a list of would happen')
parser.add_argument('--sort', type=str, default='%Y/%m-%b',
help="choose destination folder structure using datetime format \n\
https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior. \n\
Use forward slashes / to indicate subdirectory(ies) (independent of your OS convention). \n\
The default is '%%Y/%%m-%%b', which separates by year then month \n\
with both the month number and name (e.g., 2012/02-Feb).")
parser.add_argument('--rename', type=str, default=None,
help="rename file using format codes \n\
https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior. \n\
default is None which just uses original filename")
parser.add_argument('--keep-filename', action='store_true',
help='In case of duplicated output filenames an increasing number and the original file name will be appended',
default=False)
parser.add_argument('--keep-duplicates', action='store_true',
help='If file is a duplicate keep it anyway (after renaming).')
parser.add_argument('--day-begins', type=int, default=0, help='hour of day that new day begins (0-23), \n\
defaults to 0 which corresponds to midnight. Useful for grouping pictures with previous day.')
parser.add_argument('--ignore-groups', type=str, nargs='+',
default=[],
help='a list of tag groups that will be ignored for date informations.\n\
list of groups and tags here: http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/\n\
by default the group \'File\' is ignored which contains file timestamp data')
parser.add_argument('--ignore-tags', type=str, nargs='+',
default=[],
help='a list of tags that will be ignored for date informations.\n\
list of groups and tags here: http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/\n\
the full tag name needs to be included (e.g., EXIF:CreateDate)')
parser.add_argument('--use-only-groups', type=str, nargs='+',
default=None,
help='specify a restricted set of groups to search for date information\n\
e.g., EXIF')
parser.add_argument('--use-only-tags', type=str, nargs='+',
default=None,
help='specify a restricted set of tags to search for date information\n\
e.g., EXIF:CreateDate')
parser.add_argument('--keep-aae', action='store_true',
help='Include iOS .AAE sidecar files in sorted results',
default=False)
# parse command line arguments
args = parser.parse_args()
sortPhotos(args.src_dir, args.dest_dir, args.sort, args.rename, args.recursive,
args.copy, args.test, not args.keep_duplicates, args.day_begins,
args.ignore_groups, args.ignore_tags, args.use_only_groups,
args.use_only_tags, not args.silent, args.keep_filename, args.keep_aae)
if __name__ == '__main__':
main()