-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathcrx-extract
executable file
·137 lines (124 loc) · 4.59 KB
/
crx-extract
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#!/usr/bin/env python3.7
#
# Copyright (C) 2017-2018 The University of Sheffield, UK
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
# SPDX-License-Identifier: GPL-3.0-or-later
"""Tool for extracting crx file from a tar archive."""
import os
import sys
import glob
import getopt
import tarfile
import datetime
import dateutil
import dateutil.parser
from ExtensionCrawler.archive import last_crx, get_local_archive_dir
from ExtensionCrawler.config import const_basedir
def helpmsg():
"""Print help message."""
print("crx-extract [OPTION] extid")
print(" -h print this help text")
print(" -s silent (no log messages)")
print(" -e use etag instead of date in outoput")
print(" -w avoid ':' in filenames (useful on Windows)")
print(" -d=<DATE> date")
print(" -o=<DIR> output directory")
print(" -a=<DIR> archive directory")
def get_tarinfo(members, name, winfs=False, etag=None):
"""Select tarinfo object with a specified path/name."""
for tarinfo in members:
if tarinfo.name == name:
if winfs:
tarinfo.name = name.replace(":", "-")
if etag is not None:
(path, crx) = os.path.split(tarinfo.name)
(path, _) = os.path.split(path)
tarinfo.name = os.path.join(path, etag, crx)
yield tarinfo
def main(argv):
"""Main function of the extension crawler."""
basedir = const_basedir()
verbose = True
date = None
useetag = False
output = ""
winfs = False
try:
opts, args = getopt.getopt(argv, "hsed:a:o:w",
["date=", "archive=", "output="])
except getopt.GetoptError:
helpmsg()
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
helpmsg()
sys.exit()
elif opt in ("-a", "--archive"):
basedir = arg
elif opt in ("-d", "--date"):
date = arg
elif opt in ("-o", "--output"):
output = arg
elif opt in ("-w", "--winfs"):
winfs = True
elif opt in ("-e", "--etag"):
useetag = True
elif opt == '-s':
verbose = False
if len(args) > 0:
extid = args[0]
else:
helpmsg()
sys.exit()
if date is not None:
dateobj = dateutil.parser.parse(date)
if dateobj.tzinfo is None or dateobj.tzinfo.utcoffset(dateobj) is None:
dateobj = dateobj.replace(tzinfo=datetime.timezone.utc)
last, etag = last_crx(os.path.join(basedir, "data"), extid, dateobj)
else:
last, etag = last_crx(os.path.join(basedir, "data"), extid)
if not useetag:
etag = None
basetar = os.path.join(basedir, "data",
get_local_archive_dir(extid), extid)
tar = basetar+".tar"
if last != "":
if os.path.exists(tar):
files = None
if verbose:
print("Extracting " + os.path.join(output, last) + " from " + tar)
with tarfile.open(tar, 'r') as archive:
files = archive.extractall(
path=output,
members=get_tarinfo(archive, last, winfs, etag))
archivetars = sorted(glob.glob(basetar+".[0-9][0-9][0-9].tar.xz"))
while (not files and archivetars):
tar = archivetars.pop()
if verbose:
print("Extracting " + os.path.join(output, last) + " from " + tar)
with tarfile.open(tar, 'r:xz') as archive:
files = archive.extractall(
path=output,
members=get_tarinfo(archive, last, winfs, etag))
elif verbose:
print("Cannot find archive " + tar)
elif verbose:
if os.path.exists(tar):
print("CRX not in archive" + tar)
else:
print("CRX does not exist: cannot find archive " + tar)
if __name__ == "__main__":
main(sys.argv[1:])