-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpyarxiver.py
304 lines (241 loc) · 9.88 KB
/
pyarxiver.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
#!/usr/bin/env python3
# coding: latin-1
from io import StringIO
from datetime import datetime
import sys, os, signal, logging, socket
from urllib.error import HTTPError, URLError
from urllib.request import Request, urlopen
# ================ constants
PROJ = {'name' : 'pyarxiver',
'version' : '1.01',
'author' : 'compy',
'page' : 'https://github.com/com-py/',
'license' : 'https://creativecommons.org/licenses/by-nc-sa/4.0/',
'desc' : 'Python Youtube Arxiver - record from any point of live streams'}
vid_fmts = {'144p': '256x144', # video format dictionary
'240p': '426x240',
'360p': '640x360',
'480p': '854x480',
'720p': '1280x720',
'1080p': '1920x1080'}
vidfmt = vid_fmts['720p'] # default resolution
retries = 2 # retries on http error
fragdir = 'fragsdir'
prefix = 'f720p.frag'
yellow = "\033[33m"
coloff = "\033[0m"
hdrs = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0'}
ytube = 'https://www.youtube.com/watch?v='
# ================ define modules
"""
quit, highlight, terminate, download_data, get_m3u8, parser, get_seq, cal_time_back
"""
def quit(text):
msg.error(highlight(text))
sys.exit()
def highlight(text): # highlight text with escape sequence
return yellow + text + coloff
def terminate(signal, frame): # Ctrl-C handler
quit('total frags completed: {}'.format(count))
def download_data(url, header=False): # download data as bytes
retry, data, success = 0, b'', False
try:
req = Request(url, headers=hdrs if header else {})
except:
msg.info('unknown request: {}'.format(url))
return b'', False
while retry < retries:
retry += 1
try:
data = urlopen(req).read()
except socket.timeout as error: # session likely expired
if retry < retries:
msg.info('timeout error ... {}, frag {} '.format(error, count))
except (HTTPError, URLError) as error: # session likely expired
if retry < retries:
msg.info('http error ... {}, frag {} '.format(error, count))
else: # successful
success = True
break
return data, success
def get_m3u8(data): # get m3u8 link
beg = 'https'
end = 'index.m3u8'
url = ''
with StringIO(data) as f:
for eachline in f:
line = eachline.strip()
pos2 = line.find(end)
if pos2 > 0:
pos1 = line[:pos2].rfind(beg)
url = line[pos1:pos2] + end
break
return url
def parser(data): # parse master m3u8 for formats and urls
mark1 = 'RESOLUTION='
mark2 = 'index.m3u8'
formats, resolution = [], ''
with StringIO(data) as f:
for eachline in f:
line = eachline.strip()
pos1 = line.find(mark1)
pos2 = line.find(mark2)
if pos1 > 0:
k = pos1 + len(mark1)
resolution = line[k: k + line[k:].find(',')]
elif pos2 > 0:
url = get_m3u8(line)
formats.append((resolution, url))
return formats
def get_seq(htmdata): # get sequence number
frag_dur = 5 # default, just in case
curr_frag = 0
beg, end = 'https', '/sq/'
url, dur = '', '/dur/'
with StringIO(htmdata) as file:
for eachline in file:
line = eachline.strip()
pos2 = line.find(end) # look for '/sq/'
if pos2 > 0:
pos1 = line.rfind(beg) # look for url
if pos1 >=0:
j = pos2 + len(end) # end of url
url = line[pos1: j]
k = j + line[j:].find('/')
curr_frag = line[j: k]
pos1 = line[k:].find(dur)
if pos1 > 0:
durstr = line[k + pos1 + len(dur):][:5]
if durstr.replace('.','').isdigit():
frag_dur = round(float(durstr))
break
return url, curr_frag, frag_dur
# calculate frag number from time back, timeback format=days:hrs:min
def cal_time_back(timeback, frag_dur):
valid = True
seconds = 0
factor = [60, 3600, 24*3600] # secs in one min, hr, day
maxval = [59, 23, 5] # maximum values in each field
items = timeback.split(':')
items.reverse() # reverse order in case day is missing
n = len(items)
if n < 2: # no : separators
valid = False
else:
for i in range(n):
if items[i].isdigit() and int(items[i]) <= maxval[i]:
seconds += factor[i] * int(items[i])
else:
valid = False
break
if valid:
seconds = min(seconds, factor[2]*5) # max time back not to exceed 5 days
time_back = datetime.fromtimestamp(datetime.now().timestamp() - seconds)
return seconds//frag_dur, str(time_back)[:-7] # strip decimal second
else:
return 0, None
# ================ config loggger console output
msg = logging.getLogger(PROJ['name'])
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y.%m.%d %H:%M:%S')
os.system("color") # enable color output
msg.info(PROJ['name'] + ' (v' + PROJ['version'] + ') - ' + PROJ['desc'])
msg.info(PROJ['page'] + PROJ['name'] + '\n')
# ================ process arguments
n = len(sys.argv)
if n <2 or n > 4: # too few or too many args
why = 'no video link' if n<2 else 'too many parameters'
quit(why + '\nUsage: ' + sys.argv[0] +
' video-link [time-back ([day:]hr:min)]' + ' [resolution (480p or 854x480 ...)]\n'+
'e.g. download vid-id from now at 720p(default): ' +
sys.argv[0] + ' ' + ytube + 'vid-id\n' +
'or download vid-id from 2hr.30min ago at 1080p: ' +
sys.argv[0] + ' ' + ytube + 'vid-id 2:30 1080p')
vidurl, time_para = '', ''
for arg in sys.argv[1:]:
if vidurl == '':
vidurl = arg
msg.info('video link: {}'.format(vidurl))
elif arg[-1] == 'p' or arg.find('x') > 0: # format like 720p, 854x480
try:
vidfmt = vid_fmts[arg] # match standard ones
except:
vidfmt = arg # non-standard
msg.info('chosen resolution={}'.format(vidfmt))
prefix = 'f' + arg + '.frag'
else: # store as time back
if time_para == '':
time_para = arg
else:
msg.info('unknown parameter ignored - {}'.format(arg))
# ================ get response and top level m3u8 link
response, success = download_data(vidurl, True)
if success:
url = get_m3u8(response.decode("utf-8")) # get master m3u8 url
if len(url) == 0:
quit('No stream info found, check if video link is a LIVE stream.')
else:
quit('No data read, check if video link is valid'.format(vidurl))
# ================ get all m3u8 formats
data, success = download_data(url, True)
formats = parser(data.decode("utf-8"))
allfmt = ''
fmturl = ''
for fmt in formats: # select format and actual url
allfmt += ' ' + fmt[0]
if vidfmt == fmt[0]:
fmturl = fmt[1]
msg.info('available formats: {}'.format(allfmt))
if len(fmturl) == 0: # no standard
quit('No standard video format found, choose an available one above')
# ================ get actual video frags link
data, success = download_data(fmturl, True)
base, curr_frag, frag_dur = get_seq(data.decode("utf-8"))
if (len(base) == 0):
quit('no stream base url found')
# ================ calc time back if present in args
frag_back = 0
if time_para != '':
frag_back, timeback = cal_time_back(time_para, frag_dur)
if frag_back > 0:
msg.info('time back to {}, frags back={}, at {} sec/fragment'.format(timeback,
frag_back, frag_dur))
elif time_para == '':
msg.info('no time back, start at current time')
else:
msg.info('invalid time back para ignored - {}, start at current time'.format(time_para))
frag = max(int(curr_frag)-frag_back, 0) # adjust time back
# ================ create new frag dir if necessary
n, newdir = 0, fragdir
while os.path.exists(newdir): # existing dir must be empty
if os.path.isdir(newdir) and len(os.listdir(newdir)) == 0: # ok
break
n += 1
newdir = fragdir + repr(n)
if not os.path.exists(newdir): # must have 'write' permission
os.makedirs(newdir)
msg.info('downloading resolution {}, to folder {}'.format(vidfmt, newdir))
msg.info('Ctrl-C to stop downloading')
# ================ start downloading
size = 0
count = 0
counter = 60//frag_dur # update every 60 secs of video
signal.signal(signal.SIGINT, terminate) # register Ctrl-C handler
while True:
url = base + repr(frag)
vidname = prefix + repr(frag) + '.ts'
vidpath = os.path.join(newdir, vidname)
vdata, success = download_data(url)
if success:
with open(vidpath, 'wb') as file:
file.write(vdata)
size += len(vdata)
count += 1
if count % counter == 0: # update status
vidlen = frag_dur*count
hr, mn = vidlen//3600, (vidlen%3600)//60
print('frags done: {}, size: {} (mb), time (hr:min) = {}:{}'.format(count, size//1000000, hr, ('0'+repr(mn))[-2:]), end='\r')
else:
terminate(count, count) # filler args
frag += 1