Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Provider.hash_video staticmethod #1172

Merged
merged 2 commits into from
Sep 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/1172.change.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add Provider.hash_video staticmethod, to allow creating standalone providers.
6 changes: 6 additions & 0 deletions subliminal/providers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from subliminal.video import Episode, Movie, Video

if TYPE_CHECKING:
import os
from collections.abc import Sequence, Set
from http.client import HTTPSConnection
from types import TracebackType
Expand Down Expand Up @@ -136,6 +137,11 @@ class Provider(Generic[S]):
#: User Agent to use
user_agent: str = f'Subliminal/{__short_version__}'

@staticmethod
def hash_video(video_path: str | os.PathLike) -> str | None:
"""Hash the video to be used by the provider."""
return None

def __enter__(self) -> Self:
self.initialize()
return self
Expand Down
36 changes: 36 additions & 0 deletions subliminal/providers/bsplayer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
from __future__ import annotations

import logging
import os
import re
import secrets
import struct
import zlib
from time import sleep
from typing import TYPE_CHECKING, ClassVar, cast, overload
Expand Down Expand Up @@ -194,6 +196,40 @@ def __init__(self, search_url: str | None = None, timeout: int = 10) -> None:
self.session = Session()
self.search_url = search_url or get_sub_domain()

@staticmethod
def hash_video(video_path: str | os.PathLike) -> str | None:
"""Compute a hash using BSPlayer's algorithm.

:param str video_path: path of the video.
:return: the hash.
:rtype: str.
"""
little_endian_long_long = '<q' # little-endian long long
byte_size = struct.calcsize(little_endian_long_long)

with open(video_path, 'rb') as f:
file_size = os.path.getsize(video_path)
file_hash = file_size

if file_size < 65536 * 2:
return None

for _ in range(65536 // byte_size):
buff = f.read(byte_size)
(l_value,) = struct.unpack(little_endian_long_long, buff)
file_hash += l_value
file_hash &= 0xFFFFFFFFFFFFFFFF # to remain as 64bit number

f.seek(max(0, file_size - 65536), 0)

for _ in range(65536 // byte_size):
buff = f.read(byte_size)
(l_value,) = struct.unpack(little_endian_long_long, buff)
file_hash += l_value
file_hash &= 0xFFFFFFFFFFFFFFFF

return f'{file_hash:016x}'

def _api_request(self, func_name: str = 'logIn', params: str = '', tries: int = 5) -> Element:
"""Request data from search url.

Expand Down
16 changes: 16 additions & 0 deletions subliminal/providers/napiprojekt.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from __future__ import annotations

import hashlib
import io
import logging
from gzip import BadGzipFile, GzipFile
Expand All @@ -16,6 +17,7 @@
from . import Provider

if TYPE_CHECKING:
import os
from collections.abc import Set

from subliminal.video import Video
Expand Down Expand Up @@ -96,6 +98,20 @@ def __init__(self, *, timeout: int = 10) -> None:
self.timeout = timeout
self.session = None

@staticmethod
def hash_video(video_path: str | os.PathLike) -> str | None:
"""Compute a hash using NapiProjekt's algorithm.

:param str video_path: path of the video.
:return: the hash.
:rtype: str

"""
readsize = 1024 * 1024 * 10
with open(video_path, 'rb') as f:
data = f.read(readsize)
return hashlib.md5(data).hexdigest() # noqa: S324

def initialize(self) -> None:
"""Initialize the provider."""
self.session = Session()
Expand Down
105 changes: 8 additions & 97 deletions subliminal/refiners/hash.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from __future__ import annotations

import hashlib
import logging
import os
import struct
Expand All @@ -24,40 +23,6 @@
logger = logging.getLogger(__name__)


def hash_bsplayer(video_path: str | os.PathLike) -> str | None:
"""Compute a hash using BSPlayer's algorithm.

:param str video_path: path of the video.
:return: the hash.
:rtype: str.
"""
little_endian_long_long = '<q' # little-endian long long
byte_size = struct.calcsize(little_endian_long_long)

with open(video_path, 'rb') as f:
file_size = os.path.getsize(video_path)
file_hash = file_size

if file_size < 65536 * 2:
return None

for _ in range(65536 // byte_size):
buff = f.read(byte_size)
(l_value,) = struct.unpack(little_endian_long_long, buff)
file_hash += l_value
file_hash &= 0xFFFFFFFFFFFFFFFF # to remain as 64bit number

f.seek(max(0, file_size - 65536), 0)

for _ in range(65536 // byte_size):
buff = f.read(byte_size)
(l_value,) = struct.unpack(little_endian_long_long, buff)
file_hash += l_value
file_hash &= 0xFFFFFFFFFFFFFFFF

return f'{file_hash:016x}'


def hash_opensubtitles(video_path: str | os.PathLike) -> str | None:
"""Compute a hash using OpenSubtitles' algorithm.

Expand Down Expand Up @@ -87,69 +52,11 @@ def hash_opensubtitles(video_path: str | os.PathLike) -> str | None:
return f'{filehash:016x}'


def hash_thesubdb(video_path: str | os.PathLike) -> str | None: # pragma: no cover
"""Compute a hash using TheSubDB's algorithm.

:param str video_path: path of the video.
:return: the hash.
:rtype: str

"""
readsize = 64 * 1024
if os.path.getsize(video_path) < readsize:
return None
with open(video_path, 'rb') as f:
data = f.read(readsize)
f.seek(-readsize, os.SEEK_END)
data += f.read(readsize)

return hashlib.md5(data).hexdigest() # noqa: S324


def hash_napiprojekt(video_path: str | os.PathLike) -> str | None:
"""Compute a hash using NapiProjekt's algorithm.

:param str video_path: path of the video.
:return: the hash.
:rtype: str

"""
readsize = 1024 * 1024 * 10
with open(video_path, 'rb') as f:
data = f.read(readsize)
return hashlib.md5(data).hexdigest() # noqa: S324


def hash_shooter(video_path: str | os.PathLike) -> str | None: # pragma: no cover
"""Compute a hash using Shooter's algorithm.

:param string video_path: path of the video
:return: the hash
:rtype: string

"""
filesize = os.path.getsize(video_path)
readsize = 4096
if os.path.getsize(video_path) < readsize * 2:
return None
offsets = (readsize, filesize // 3 * 2, filesize // 3, filesize - readsize * 2)
filehash = []
with open(video_path, 'rb') as f:
for offset in offsets:
f.seek(offset)
filehash.append(hashlib.md5(f.read(readsize)).hexdigest()) # noqa: S324
return ';'.join(filehash)


hash_functions: dict[str, HashFunc] = {
'bsplayer': hash_bsplayer,
'napiprojekt': hash_napiprojekt,
'opensubtitles': hash_opensubtitles,
'opensubtitlesvip': hash_opensubtitles,
'opensubtitlescom': hash_opensubtitles,
'opensubtitlescomvip': hash_opensubtitles,
'shooter': hash_shooter,
'thesubdb': hash_thesubdb,
}


Expand All @@ -174,16 +81,20 @@ def refine(
logger.debug('Computing hashes for %r', video.name)
for name in providers or default_providers:
provider = cast(Provider, provider_manager[name].plugin)
if name not in hash_functions:
continue

if not provider.check_types(video):
continue

if languages is not None and not provider.check_languages(languages):
continue

h = hash_functions[name](video.name)
# Try provider static method
h = provider.hash_video(video.name)

# Try generic hashes
if h is None and name in hash_functions:
h = hash_functions[name](video.name)

# Add hash
if h is not None:
video.hashes[name] = h

Expand Down
9 changes: 9 additions & 0 deletions tests/providers/test_bsplayer.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,15 @@
SEARCH_URL = 'http://s1.api.bsplayer-subtitles.com/v1.php'


def test_hash_bsplayer(mkv):
assert BSPlayerProvider.hash_video(mkv['test1']) == '40b44a7096b71ec3'


def test_hash_bsplayer_too_small(tmpdir):
path = tmpdir.ensure('test_too_small.mkv')
assert BSPlayerProvider.hash_video(str(path)) is None


def test_get_matches_movie_hash(episodes):
subtitle = BSPlayerSubtitle(
subtitle_id='16442520',
Expand Down
4 changes: 4 additions & 0 deletions tests/providers/test_napiprojekt.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@
)


def test_hash_napiprojekt(mkv):
assert NapiProjektProvider.hash_video(mkv['test1']) == '9884a2b66dcb2965d0f45ce84e37b60c'


def test_get_matches(movies):
subtitle = NapiProjektSubtitle(Language('pol'), '6303e7ee6a835e9fcede9fb2fb00cb36')
matches = subtitle.get_matches(movies['man_of_steel'])
Expand Down
20 changes: 1 addition & 19 deletions tests/refiners/test_hash.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,4 @@
from subliminal.refiners.hash import hash_bsplayer, hash_opensubtitles, hash_thesubdb


def test_hash_bsplayer(mkv):
assert hash_bsplayer(mkv['test1']) == '40b44a7096b71ec3'


def test_hash_bsplayer_too_small(tmpdir):
path = tmpdir.ensure('test_too_small.mkv')
assert hash_bsplayer(str(path)) is None
from subliminal.refiners.hash import hash_opensubtitles


def test_hash_opensubtitles(mkv):
Expand All @@ -17,12 +8,3 @@ def test_hash_opensubtitles(mkv):
def test_hash_opensubtitles_too_small(tmpdir):
path = tmpdir.ensure('test_too_small.mkv')
assert hash_opensubtitles(str(path)) is None


def test_hash_thesubdb(mkv):
assert hash_thesubdb(mkv['test1']) == '054e667e93e254f8fa9f9e8e6d4e73ff'


def test_hash_thesubdb_too_small(tmpdir):
path = tmpdir.ensure('test_too_small.mkv')
assert hash_thesubdb(str(path)) is None
Loading