Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add XSubs provider #945

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
5 changes: 5 additions & 0 deletions docs/api/providers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,8 @@ TVsubtitles
-----------
.. automodule:: subliminal.providers.tvsubtitles
:private-members:

XSubs
-----------
.. automodule:: subliminal.providers.xsubs
:private-members:
1 change: 1 addition & 0 deletions docs/user/how_it_works.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ subtitles. Current supported providers are:
* Shooter
* TheSubDB
* TvSubtitles
* XSubs

Providers all inherit the same :class:`~subliminal.providers.Provider` base class and thus share the same API.
They are registered on the ``subliminal.providers`` entry point and are exposed through the
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ def find_version(*file_paths):
'podnapisi = subliminal.providers.podnapisi:PodnapisiProvider',
'shooter = subliminal.providers.shooter:ShooterProvider',
'thesubdb = subliminal.providers.thesubdb:TheSubDBProvider',
'tvsubtitles = subliminal.providers.tvsubtitles:TVsubtitlesProvider'
'tvsubtitles = subliminal.providers.tvsubtitles:TVsubtitlesProvider',
'xsubs = subliminal.providers.xsubs:XSubsProvider'
],
'subliminal.refiners': [
'metadata = subliminal.refiners.metadata:refine',
Expand Down
5 changes: 4 additions & 1 deletion subliminal/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,13 +241,14 @@ def convert(self, value, param, ctx):
@click.option('--legendastv', type=click.STRING, nargs=2, metavar='USERNAME PASSWORD', help='LegendasTV configuration.')
@click.option('--opensubtitles', type=click.STRING, nargs=2, metavar='USERNAME PASSWORD',
help='OpenSubtitles configuration.')
@click.option('--xsubs', type=click.STRING, nargs=2, metavar='USERNAME PASSWORD', help='XSubs configuration.')
@click.option('--omdb', type=click.STRING, nargs=1, metavar='APIKEY', help='OMDB API key.')
@click.option('--cache-dir', type=click.Path(writable=True, file_okay=False), default=dirs.user_cache_dir,
show_default=True, expose_value=True, help='Path to the cache directory.')
@click.option('--debug', is_flag=True, help='Print useful information for debugging subliminal and for reporting bugs.')
@click.version_option(__version__)
@click.pass_context
def subliminal(ctx, addic7ed, legendastv, opensubtitles, omdb, cache_dir, debug):
def subliminal(ctx, addic7ed, legendastv, opensubtitles, xsubs, omdb, cache_dir, debug):
"""Subtitles, faster than your thoughts."""
# create cache directory
try:
Expand Down Expand Up @@ -280,6 +281,8 @@ def subliminal(ctx, addic7ed, legendastv, opensubtitles, omdb, cache_dir, debug)
if opensubtitles:
ctx.obj['provider_configs']['opensubtitles'] = {'username': opensubtitles[0], 'password': opensubtitles[1]}
ctx.obj['provider_configs']['opensubtitlesvip'] = {'username': opensubtitles[0], 'password': opensubtitles[1]}
if xsubs:
ctx.obj['provider_configs']['xsubs'] = {'username': xsubs[0], 'password': xsubs[1]}

# refiner configs
if omdb:
Expand Down
3 changes: 2 additions & 1 deletion subliminal/extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@ def unregister(self, entry_point):
'podnapisi = subliminal.providers.podnapisi:PodnapisiProvider',
'shooter = subliminal.providers.shooter:ShooterProvider',
'thesubdb = subliminal.providers.thesubdb:TheSubDBProvider',
'tvsubtitles = subliminal.providers.tvsubtitles:TVsubtitlesProvider'
'tvsubtitles = subliminal.providers.tvsubtitles:TVsubtitlesProvider',
'xsubs = subliminal.providers.xsubs:XSubsProvider'
])

#: Disabled providers
Expand Down
291 changes: 291 additions & 0 deletions subliminal/providers/xsubs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,291 @@
# -*- coding: utf-8 -*-
import logging
import re

from babelfish import Language
from guessit import guessit
from requests import Session

from . import ParserBeautifulSoup, Provider
from .. import __short_version__
from ..cache import SHOW_EXPIRATION_TIME, region
from ..exceptions import AuthenticationError, ConfigurationError
from ..matches import guess_matches
from ..subtitle import Subtitle, fix_line_ending
from ..utils import sanitize
from ..video import Episode

logger = logging.getLogger(__name__)
article_re = re.compile(r'^([A-Za-z]{1,3}) (.*)$')
episode_re = re.compile(r'^(\d+)(-(\d+))*$')


class XSubsSubtitle(Subtitle):
"""XSubs Subtitle."""
provider_name = 'xsubs'

def __init__(self, language, page_link, series, season, episode, year, title, version, download_link):
super(XSubsSubtitle, self).__init__(language, page_link=page_link)
self.series = series
self.season = season
self.episode = episode
self.year = year
self.title = title
self.version = version
self.download_link = download_link
self.hearing_impaired = None
self.encoding = 'windows-1253'

@property
def id(self):
return self.download_link

@property
def info(self):
return self.version or self.download_link

def get_matches(self, video):
matches = guess_matches(video, {
'title': self.series,
'season': self.season,
'episode': self.episode,
'episode_title': self.title,
'year': self.year,
'release_group': self.version
})

# other properties
matches |= guess_matches(video, guessit(self.version, {'type': 'episode'}), partial=True)

return matches


class XSubsProvider(Provider):
"""XSubs Provider."""
languages = {Language.fromalpha2(l) for l in ['el']}
video_types = (Episode,)
server_url = 'http://xsubs.tv'
sign_in_url = '/xforum/account/signin/'
sign_out_url = '/xforum/account/signout/'
all_series_url = '/series/all.xml'
series_url = '/series/{:d}/main.xml'
season_url = '/series/{show_id:d}/{season:d}.xml'
page_link = '/ice/xsw.xml?srsid={show_id:d}#{season_id:d};{season:d};{episode:d}'
download_link = '/xthru/getsub/{:d}'
subtitle_class = XSubsSubtitle

def __init__(self, username=None, password=None):
if any((username, password)) and not all((username, password)):
raise ConfigurationError('Username and password must be specified')

self.username = username
self.password = password
self.logged_in = False
self.session = None

def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/{}'.format(__short_version__)

# login
if self.username and self.password:
logger.info('Logging in')
self.session.get(self.server_url + self.sign_in_url)
data = {'username': self.username,
'password': self.password,
'csrfmiddlewaretoken': self.session.cookies['csrftoken']}
r = self.session.post(self.server_url + self.sign_in_url, data, allow_redirects=False, timeout=10)

if r.status_code != 302:
raise AuthenticationError(self.username)

logger.debug('Logged in')
self.logged_in = True

def terminate(self):
# logout
if self.logged_in:
logger.info('Logging out')
r = self.session.get(self.server_url + self.sign_out_url, timeout=10)
r.raise_for_status()
logger.debug('Logged out')
self.logged_in = False

self.session.close()

@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME, should_cache_fn=lambda value: value)
def _get_show_ids(self):
# get the shows page
logger.info('Getting show ids')
r = self.session.get(self.server_url + self.all_series_url, timeout=10)
r.raise_for_status()

if not r.content:
logger.debug('No data returned from provider')
return []

soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

# populate the show ids
show_ids = {}
for show_category in soup.findAll('seriesl'):
if show_category.attrs['category'] == u'Σειρές':
for show in show_category.findAll('series'):
show_ids[sanitize(show.text)] = int(show['srsid'])
break
logger.debug('Found %d show ids', len(show_ids))

return show_ids

def get_show_id(self, series_names, year=None):
series_sanitized_names = []
for name in series_names:
sanitized_name = sanitize(name)
series_sanitized_names.append(sanitized_name)
alternative_name = _get_alternative_name(sanitized_name)
if alternative_name:
series_sanitized_names.append(alternative_name)

show_ids = self._get_show_ids()
show_id = None

for series_sanitized in series_sanitized_names:
# attempt with year
if year:
logger.debug('Getting show id with year')
show_id = show_ids.get('{series} {year:d}'.format(series=series_sanitized, year=year))

# attempt with article at the end
if not show_id and year:
logger.debug('Getting show id with year in brackets')
show_id = show_ids.get('{series} [{year:d}]'.format(series=series_sanitized, year=year))

# attempt clean
if not show_id:
logger.debug('Getting show id')
show_id = show_ids.get(series_sanitized)

if show_id:
break

return int(show_id) if show_id else None

def query(self, show_id, series, season, year=None, country=None):
# get the season list of the show
logger.info('Getting the season list of show id %d', show_id)
r = self.session.get(self.server_url + self.series_url.format(show_id), timeout=10)
r.raise_for_status()

if not r.content:
logger.debug('No data returned from provider')
return []

soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

series = soup.find('name').text

# loop over season rows
seasons = soup.findAll('series_group')
season_id = None

for season_row in seasons:
try:
parsed_season = int(season_row['ssnnum'])
if parsed_season == season:
season_id = int(season_row['ssnid'])
break
except (ValueError, TypeError):
continue

if season_id is None:
logger.debug('Season not found in provider')
return []

# get the subtitle list of the season
logger.info('Getting the subtitle list of season %d', season)
r = self.session.get(self.server_url + self.season_url.format(show_id=show_id, season=season_id), timeout=10)
r.raise_for_status()

if not r.content:
logger.debug('No data returned from provider')
return []

soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

subtitles = []
# loop over episode rows
for subtitle_group in soup.findAll('subg'):
# read the episode info
episode_info = subtitle_group.find('etitle')
if episode_info is None:
continue

episodes = []
episode_match = episode_re.match(episode_info['number'])
if episode_match:
episodes = [int(e) for e in [episode_match.group(1), episode_match.group(3)] if e]

subtitle_info = subtitle_group.find('sgt')
if subtitle_info is None:
continue

season = int(subtitle_info['ssnnum'])
episode_id = int(subtitle_info['epsid'])

# filter out unreleased subtitles
for subs_tag in subtitle_group.findAll('sr'):
if subs_tag['published_on'] == '':
continue

page_link = self.server_url + self.page_link.format(show_id=show_id, season_id=season_id,
season=season, episode=episode_id)
title = episode_info['title']
version = subs_tag.fmt.text + ' ' + subs_tag.team.text
download_link = self.server_url + self.download_link.format(int(subs_tag['rlsid']))

for episode in episodes:
subtitle = self.subtitle_class(Language.fromalpha2('el'), page_link, series, season, episode, year,
title, version, download_link)
logger.debug('Found subtitle %r', subtitle)
subtitles.append(subtitle)

return subtitles

def list_subtitles(self, video, languages):
if isinstance(video, Episode):
# lookup show_id
titles = [video.series] + video.alternative_series
show_id = self.get_show_id(titles, video.year)

# query for subtitles with the show_id
if show_id:
subtitles = [s for s in self.query(show_id, video.series, video.season, video.year)
if s.language in languages and s.season == video.season and s.episode == video.episode]
if subtitles:
return subtitles
else:
logger.error('No show id found for %r (%r)', video.series, {'year': video.year})

return []

def download_subtitle(self, subtitle):
if isinstance(subtitle, XSubsSubtitle):
# download the subtitle
logger.info('Downloading subtitle %r', subtitle)
r = self.session.get(subtitle.download_link, headers={'Referer': subtitle.page_link},
timeout=10)
r.raise_for_status()

if not r.content:
logger.debug('Unable to download subtitle. No data returned from provider')
return

subtitle.content = fix_line_ending(r.content)


def _get_alternative_name(series):
article_match = article_re.match(series)
if article_match:
return '{series} {article}'.format(series=article_match.group(2), article=article_match.group(1))

return None
Loading