Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/ISSUE_TEMPLATE/bug_report.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ Steps to reproduce the behavior:
### What code / cli command are you executing?
For example: I am running
```
YouTubeTranscriptApi.get_transcript ...
YouTubeTranscriptApi().fetch() ...
```

### Which Python version are you using?
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "youtube-transcript-api"
version = "1.1.1"
version = "1.2.0"
description = "This is an python API which allows you to get the transcripts/subtitles for a given YouTube video. It also works for automatically generated subtitles, supports translating subtitles and it does not require a headless browser, like other selenium based solutions do!"
readme = "README.md"
license = "MIT"
Expand Down
169 changes: 1 addition & 168 deletions youtube_transcript_api/_api.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import warnings
from typing import Optional, Iterable

from requests import Session
from requests.adapters import HTTPAdapter
from urllib3 import Retry

from .proxies import ProxyConfig, GenericProxyConfig
from .proxies import ProxyConfig

from ._transcripts import TranscriptListFetcher, FetchedTranscript, TranscriptList

Expand Down Expand Up @@ -126,169 +125,3 @@ def list(
Make sure that this is the actual ID, NOT the full URL to the video!
"""
return self._fetcher.fetch(video_id)

@classmethod
def list_transcripts(cls, video_id, proxies=None):
"""
DEPRECATED: use the `list` method instead!

Retrieves the list of transcripts which are available for a given video. It returns a `TranscriptList` object
which is iterable and provides methods to filter the list of transcripts for specific languages. While iterating
over the `TranscriptList` the individual transcripts are represented by `Transcript` objects, which provide
metadata and can either be fetched by calling `transcript.fetch()` or translated by calling
`transcript.translate('en')`. Example:

# retrieve the available transcripts
transcript_list = YouTubeTranscriptApi.list_transcripts('video_id')

# iterate over all available transcripts
for transcript in transcript_list:
# the Transcript object provides metadata properties
print(
transcript.video_id,
transcript.language,
transcript.language_code,
# whether it has been manually created or generated by YouTube
transcript.is_generated,
# a list of languages the transcript can be translated to
transcript.translation_languages,
)

# fetch the actual transcript data
print(transcript.fetch())

# translating the transcript will return another transcript object
print(transcript.translate('en').fetch())

# you can also directly filter for the language you are looking for, using the transcript list
transcript = transcript_list.find_transcript(['de', 'en'])

# or just filter for manually created transcripts
transcript = transcript_list.find_manually_created_transcript(['de', 'en'])

# or automatically generated ones
transcript = transcript_list.find_generated_transcript(['de', 'en'])

:param video_id: the youtube video id
:type video_id: str
:param proxies: a dictionary mapping of http and https proxies to be used for the network requests
:type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies
:return: the list of available transcripts
:rtype TranscriptList:
"""
warnings.warn(
"`list_transcripts` is deprecated and will be removed in a future version. "
"Use the `list` method instead!",
DeprecationWarning,
)

proxy_config = None
if proxies:
if isinstance(proxies, ProxyConfig):
proxy_config = proxies
else:
proxy_config = GenericProxyConfig(
http_url=proxies.get("http"), https_url=proxies.get("https")
)

ytt_api = YouTubeTranscriptApi(
proxy_config=proxy_config,
)
return ytt_api.list(video_id)

@classmethod
def get_transcripts(
cls,
video_ids,
languages=("en",),
continue_after_error=False,
proxies=None,
preserve_formatting=False,
):
"""
DEPRECATED: use the `fetch` method instead!

Retrieves the transcripts for a list of videos.

:param video_ids: a list of youtube video ids
:type video_ids: list[str]
:param languages: A list of language codes in a descending priority. For example, if this is set to ['de', 'en']
it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if it fails to
do so.
:type languages: list[str]
:param continue_after_error: if this is set the execution won't be stopped, if an error occurs while retrieving
one of the video transcripts
:type continue_after_error: bool
:param proxies: a dictionary mapping of http and https proxies to be used for the network requests
:type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies
:param preserve_formatting: whether to keep select HTML text formatting
:type preserve_formatting: bool
:return: a tuple containing a dictionary mapping video ids onto their corresponding transcripts, and a list of
video ids, which could not be retrieved
:rtype ({str: [{'text': str, 'start': float, 'end': float}]}, [str]}):
"""
warnings.warn(
"`get_transcripts` is deprecated and will be removed in a future version. "
"Use the `fetch` method instead!",
DeprecationWarning,
)

assert isinstance(video_ids, list), "`video_ids` must be a list of strings"

data = {}
unretrievable_videos = []

for video_id in video_ids:
try:
data[video_id] = cls.get_transcript(
video_id, languages, proxies, preserve_formatting
)
except Exception as exception:
if not continue_after_error:
raise exception

unretrievable_videos.append(video_id)

return data, unretrievable_videos

@classmethod
def get_transcript(
cls,
video_id,
languages=("en",),
proxies=None,
preserve_formatting=False,
):
"""
DEPRECATED: use the `fetch` method instead!

Retrieves the transcript for a single video. This is just a shortcut for calling::

YouTubeTranscriptApi.list_transcripts(video_id, proxies).find_transcript(languages).fetch()

:param video_id: the youtube video id
:type video_id: str
:param languages: A list of language codes in a descending priority. For example, if this is set to ['de', 'en']
it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if it fails to
do so.
:type languages: list[str]
:param proxies: a dictionary mapping of http and https proxies to be used for the network requests
:type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies
:param preserve_formatting: whether to keep select HTML text formatting
:type preserve_formatting: bool
:return: a list of dictionaries containing the 'text', 'start' and 'duration' keys
:rtype [{'text': str, 'start': float, 'end': float}]:
"""
warnings.warn(
"`get_transcript` is deprecated and will be removed in a future version. "
"Use the `fetch` method instead!",
DeprecationWarning,
)

assert isinstance(video_id, str), "`video_id` must be a string"
return (
cls.list_transcripts(video_id, proxies)
.find_transcript(languages)
.fetch(preserve_formatting=preserve_formatting)
.to_raw_data()
)
4 changes: 2 additions & 2 deletions youtube_transcript_api/_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,8 @@ class VideoUnavailable(CouldNotRetrieveTranscript):
class InvalidVideoId(CouldNotRetrieveTranscript):
CAUSE_MESSAGE = (
"You provided an invalid video id. Make sure you are using the video id and NOT the url!\n\n"
'Do NOT run: `YouTubeTranscriptApi.get_transcript("https://www.youtube.com/watch?v=1234")`\n'
'Instead run: `YouTubeTranscriptApi.get_transcript("1234")`'
'Do NOT run: `YouTubeTranscriptApi().fetch("https://www.youtube.com/watch?v=1234")`\n'
'Instead run: `YouTubeTranscriptApi().fetch("1234")`'
)


Expand Down
Loading