-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathcaptions.py
More file actions
49 lines (31 loc) · 1.26 KB
/
captions.py
File metadata and controls
49 lines (31 loc) · 1.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from youtube_transcript_api import YouTubeTranscriptApi
from main import video_id
import pandas as pd
from main import path
import time
from random import randint
def get_caption(video_id):
df = pd.DataFrame(columns=['video_id', 'subtitle'])
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id=video_id)
transcript = transcript_list.find_generated_transcript(['de', 'en'])
if transcript == 'de ("Deutsch (automatisch erzeugt)")[TRANSLATABLE]':
result = transcript.fetch()
text = ''
for i in result:
text += i['text'] + ' '
elif 'en ("Englisch (automatisch erzeugt)")[TRANSLATABLE]':
translated_transcript = transcript.translate('de')
result = translated_transcript.fetch()
text = ''
for i in result:
text += i['text'] + ' '
df = pd.concat([df, pd.DataFrame([{'video_id': video_id, 'subtitle': text}])])
time.sleep(randint(15,20)) # short break after request
return df
# GET SUBTITLE OF MULTIPLE VIDEOS
big = pd.DataFrame()
# video_ids = ['IYN37vFn09c', 'R277Tc35Y4A', '-E-Qe8jdbbQ']
for i in video_ids:
df = get_caption(video_id=i)
big = pd.concat([big, df], ignore_index=True)
big.to_csv((path + 'XXX5.csv'), encoding='utf-8-sig')