-
Notifications
You must be signed in to change notification settings - Fork 13
REVAI-3855: Update python sdk to support Super API #105
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 20 commits
16f225f
c3249d5
98a877c
ec67023
3300947
19690b1
1646d96
d38ac85
a045d99
35f66e3
56b6ba1
7feea90
1a7be10
70a92b6
9131661
7e93e4e
f2c5083
82597ba
7510072
2b4a1bb
74b6321
d85bbee
33388e9
3949ca3
58b17ef
71776f0
0c87876
a9bcfc0
701e5ec
a9cc05f
fba7a79
ea8dea8
cb259c9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,9 +2,13 @@ | |
| """Speech recognition tools for using Rev AI""" | ||
|
|
||
| import json | ||
| from .models import Account, CaptionType, Job, Transcript | ||
| from .baseclient import BaseClient | ||
|
|
||
| from . import utils | ||
| from .baseclient import BaseClient | ||
| from .models import Account, CaptionType, Job, Transcript | ||
| from .models.asynchronous.summarization_options import SummarizationOptions | ||
| from .models.asynchronous.summary import Summary | ||
| from .models.asynchronous.translation_options import TranslationOptions | ||
|
|
||
| try: | ||
| from urllib.parse import urljoin | ||
|
|
@@ -66,7 +70,9 @@ def submit_job_url( | |
| notification_config=None, | ||
| skip_postprocessing=False, | ||
| remove_atmospherics=False, | ||
| speakers_count=None): | ||
| speakers_count=None, | ||
| summarization_config: SummarizationOptions = None, | ||
| translation_config: TranslationOptions = None): | ||
| """Submit media given a URL for transcription. | ||
| The audio data is downloaded from the URL | ||
| :param media_url: web location of the media file | ||
|
|
@@ -116,6 +122,8 @@ def submit_job_url( | |
| :param remove_atmospherics: Atmospherics such as <laugh>, <affirmative>, etc. will not | ||
| appear in the transcript. | ||
| :param speakers_count: Use to specify the total number of unique speakers in the audio. | ||
| :param summarization_config: Use to request transcript summary. | ||
| :param translation_config: Use to request transcript translation. | ||
| :returns: raw response data | ||
| :raises: HTTPError | ||
| """ | ||
|
|
@@ -128,7 +136,9 @@ def submit_job_url( | |
| verbatim, rush, test_mode, | ||
| segments_to_transcribe, speaker_names, | ||
| source_config, notification_config, | ||
| skip_postprocessing) | ||
| skip_postprocessing, | ||
| summarization_config=summarization_config, | ||
| translation_config=translation_config) | ||
|
|
||
| response = self._make_http_request( | ||
| "POST", | ||
|
|
@@ -161,7 +171,9 @@ def submit_job_local_file( | |
| notification_config=None, | ||
| skip_postprocessing=False, | ||
| remove_atmospherics=False, | ||
| speakers_count=None): | ||
| speakers_count=None, | ||
| summarization_config: SummarizationOptions = None, | ||
| translation_config: TranslationOptions = None): | ||
| """Submit a local file for transcription. | ||
| Note that the content type is inferred if not provided. | ||
|
|
||
|
|
@@ -208,6 +220,8 @@ def submit_job_local_file( | |
| :param remove_atmospherics: Atmospherics such as <laugh>, <affirmative>, etc. will not | ||
| appear in the transcript. | ||
| :param speakers_count: Use to specify the total number of unique speakers in the audio. | ||
| :param summarization_config: Use to request transcript summary. | ||
| :param translation_config: Use to request transcript translation. | ||
| :returns: raw response data | ||
| :raises: HTTPError, ValueError | ||
| """ | ||
|
|
@@ -222,7 +236,9 @@ def submit_job_local_file( | |
| language, custom_vocabulary_id, transcriber, | ||
| verbatim, rush, test_mode, | ||
| segments_to_transcribe, speaker_names, None, | ||
| notification_config, skip_postprocessing) | ||
| notification_config, skip_postprocessing, | ||
| summarization_config=summarization_config, | ||
| translation_config=translation_config) | ||
|
|
||
| with open(filename, 'rb') as f: | ||
| files = { | ||
|
|
@@ -451,6 +467,160 @@ def get_account(self): | |
|
|
||
| return Account.from_json(response.json()) | ||
|
|
||
| def get_transcript_summary_text(self, id_): | ||
| """Get the transcript summary of a specific job as plain text. | ||
|
|
||
| :param id_: id of job to be requested | ||
| :returns: transcript data as text | ||
| :raises: HTTPError | ||
| """ | ||
| if not id_: | ||
| raise ValueError('id_ must be provided') | ||
|
|
||
| response = self._make_http_request( | ||
| "GET", | ||
| urljoin(self.base_url, 'jobs/{}/transcript/summary'.format(id_)), | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any way to extract |
||
| headers={'Accept': 'text/plain'} | ||
| ) | ||
| return response.text | ||
|
|
||
| def get_transcript_summary_json(self, id_): | ||
| """Get the transcript summary of a specific job as json. | ||
|
|
||
| :param id_: id of job to be requested | ||
| :returns: transcript data as json | ||
| :raises: HTTPError | ||
| """ | ||
| if not id_: | ||
| raise ValueError('id_ must be provided') | ||
|
|
||
| response = self._make_http_request( | ||
| "GET", | ||
| urljoin(self.base_url, 'jobs/{}/transcript/summary'.format(id_)), | ||
| headers={'Accept': 'application/json'} | ||
| ) | ||
|
|
||
| return Summary.from_json(response.json()) | ||
|
|
||
| def get_transcript_summary_json_as_stream(self, id_): | ||
| """Get the transcript summary of a specific job as streamed json. | ||
|
|
||
| :param id_: id of job to be requested | ||
| :returns: requests.models.Response HTTP response which can be used to stream | ||
| the payload of the response | ||
| :raises: HTTPError | ||
| """ | ||
| if not id_: | ||
| raise ValueError('id_ must be provided') | ||
|
|
||
| response = self._make_http_request( | ||
| "GET", | ||
| urljoin(self.base_url, 'jobs/{}/transcript/summary'.format(id_)), | ||
| headers={'Accept': 'application/json'}, | ||
| stream=True | ||
| ) | ||
|
|
||
| return response | ||
|
|
||
| def get_translated_transcript_text(self, id_, language): | ||
| """Get the translated transcript of a specific job as plain text. | ||
|
|
||
| :param id_: id of job to be requested | ||
| :param language: requested language | ||
| :returns: transcript data as text | ||
| :raises: HTTPError | ||
| """ | ||
| if not id_: | ||
| raise ValueError('id_ must be provided') | ||
|
|
||
| response = self._make_http_request( | ||
| "GET", | ||
| urljoin(self.base_url, 'jobs/{}/transcript/translation/{}'.format(id_, language)), | ||
| headers={'Accept': 'text/plain'} | ||
| ) | ||
|
|
||
| return response.text | ||
|
|
||
| def get_translated_transcript_text_as_stream(self, id_, language): | ||
| """Get the translated transcript of a specific job as a plain text stream. | ||
|
|
||
| :param id_: id of job to be requested | ||
| :param language: requested language | ||
| :returns: requests.models.Response HTTP response which can be used to stream | ||
| the payload of the response | ||
| :raises: HTTPError | ||
| """ | ||
| if not id_: | ||
| raise ValueError('id_ must be provided') | ||
|
|
||
| response = self._make_http_request( | ||
| "GET", | ||
| urljoin(self.base_url, 'jobs/{}/transcript/translation/{}'.format(id_, language)), | ||
| headers={'Accept': 'text/plain'}, | ||
| stream=True | ||
| ) | ||
|
|
||
| return response | ||
|
|
||
| def get_translated_transcript_json(self, id_, language): | ||
| """Get the translated transcript of a specific job as json. | ||
|
|
||
| :param id_: id of job to be requested | ||
| :param language: requested language | ||
| :returns: transcript data as json | ||
| :raises: HTTPError | ||
| """ | ||
| if not id_: | ||
| raise ValueError('id_ must be provided') | ||
|
|
||
| response = self._make_http_request( | ||
| "GET", | ||
| urljoin(self.base_url, 'jobs/{}/transcript/translation/{}'.format(id_, language)), | ||
| headers={'Accept': self.rev_json_content_type} | ||
| ) | ||
|
|
||
| return response.json() | ||
|
|
||
| def get_translated_transcript_json_as_stream(self, id_, language): | ||
| """Get the translated transcript of a specific job as streamed json. | ||
|
|
||
| :param id_: id of job to be requested | ||
| :param language: requested language | ||
| :returns: requests.models.Response HTTP response which can be used to stream | ||
| the payload of the response | ||
| :raises: HTTPError | ||
| """ | ||
| if not id_: | ||
| raise ValueError('id_ must be provided') | ||
|
|
||
| response = self._make_http_request( | ||
| "GET", | ||
| urljoin(self.base_url, 'jobs/{}/transcript/translation/{}'.format(id_, language)), | ||
| headers={'Accept': self.rev_json_content_type}, | ||
| stream=True | ||
| ) | ||
|
|
||
| return response | ||
|
|
||
| def get_translated_transcript_object(self, id_, language): | ||
| """Get the translated transcript of a specific job as a python object`. | ||
|
|
||
| :param id_: id of job to be requested | ||
| :param language: requested language | ||
| :returns: transcript data as a python object | ||
| :raises: HTTPError | ||
| """ | ||
| if not id_: | ||
| raise ValueError('id_ must be provided') | ||
|
|
||
| response = self._make_http_request( | ||
| "GET", | ||
| urljoin(self.base_url, 'jobs/{}/transcript/translation/{}'.format(id_, language)), | ||
| headers={'Accept': self.rev_json_content_type} | ||
| ) | ||
|
|
||
| return Transcript.from_json(response.json()) | ||
|
|
||
| def _create_job_options_payload( | ||
| self, | ||
| media_url=None, | ||
|
|
@@ -475,7 +645,9 @@ def _create_job_options_payload( | |
| notification_config=None, | ||
| skip_postprocessing=False, | ||
| remove_atmospherics=None, | ||
| speakers_count=None): | ||
| speakers_count=None, | ||
| summarization_config: SummarizationOptions = None, | ||
| translation_config: TranslationOptions = None): | ||
| payload = {} | ||
| if media_url: | ||
| payload['media_url'] = media_url | ||
|
|
@@ -512,7 +684,7 @@ def _create_job_options_payload( | |
| if segments_to_transcribe: | ||
| payload['segments_to_transcribe'] = segments_to_transcribe | ||
| if speaker_names: | ||
| payload['speaker_names'] =\ | ||
| payload['speaker_names'] = \ | ||
| utils._process_speaker_names(speaker_names) | ||
| if source_config: | ||
| payload['source_config'] = source_config.to_dict() | ||
|
|
@@ -524,6 +696,10 @@ def _create_job_options_payload( | |
| payload['remove_atmospherics'] = remove_atmospherics | ||
| if speakers_count: | ||
| payload['speakers_count'] = speakers_count | ||
| if summarization_config: | ||
| payload['summarization_config'] = summarization_config.to_dict() | ||
| if translation_config: | ||
| payload['translation_config'] = translation_config.to_dict() | ||
| return payload | ||
|
|
||
| def _create_captions_query(self, speaker_channel): | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -1,7 +1,8 @@ | ||||||||||||||
| # -*- coding: utf-8 -*- | ||||||||||||||
| """Job model""" | ||||||||||||||
|
|
||||||||||||||
| from .summarization_options import Summarization | ||||||||||||||
| from .job_status import JobStatus | ||||||||||||||
| from .translation_options import Translation | ||||||||||||||
|
|
||||||||||||||
|
|
||||||||||||||
| class Job: | ||||||||||||||
|
|
@@ -28,7 +29,9 @@ def __init__( | |||||||||||||
| rush=None, | ||||||||||||||
| segments_to_transcribe=None, | ||||||||||||||
| remove_atmospherics=None, | ||||||||||||||
| speakers_count=None): | ||||||||||||||
| speakers_count=None, | ||||||||||||||
| summarization: Summarization = None, | ||||||||||||||
| translation: Translation = None): | ||||||||||||||
|
Comment on lines
+32
to
+34
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as bellow:
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it is less matter what is the autoformatting when all the remaining code will be formatted differently. at the same time, as per PEP8
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Discussed on Zoom. Agreed that formatting of arguments with type annotation is different according to the standard. |
||||||||||||||
| """ | ||||||||||||||
| :param id_: unique id of job | ||||||||||||||
| :param created_on: date and time at which this job was started | ||||||||||||||
|
|
@@ -85,6 +88,8 @@ def __init__( | |||||||||||||
| self.segments_to_transcribe = segments_to_transcribe | ||||||||||||||
| self.remove_atmospherics = remove_atmospherics | ||||||||||||||
| self.speakers_count = speakers_count | ||||||||||||||
| self.summarization = summarization | ||||||||||||||
| self.translation = translation | ||||||||||||||
|
|
||||||||||||||
| def __eq__(self, other): | ||||||||||||||
| """Override default equality operator""" | ||||||||||||||
|
|
@@ -120,5 +125,7 @@ def from_json(cls, json): | |||||||||||||
| rush=json.get('rush'), | ||||||||||||||
| segments_to_transcribe=json.get('segments_to_transcribe'), | ||||||||||||||
| remove_atmospherics=json.get('remove_atmospherics'), | ||||||||||||||
| speakers_count=json.get('speakers_count') | ||||||||||||||
| speakers_count=json.get('speakers_count'), | ||||||||||||||
| summarization=Summarization.from_json(json.get('summarization')), | ||||||||||||||
| translation=Translation.from_json(json.get('translation')) | ||||||||||||||
| ) | ||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| from enum import Enum | ||
| """Summarization formatting options.""" | ||
|
|
||
|
|
||
| class SummarizationFormattingOptions(str, Enum): | ||
| PARAGRAPH = "paragraph" | ||
| BULLETS = "bullets" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,10 @@ | ||
| # -*- coding: utf-8 -*- | ||
| """Enum for Summarization Job statuses""" | ||
|
|
||
| from enum import Enum | ||
|
|
||
|
|
||
| class SummarizationJobStatus(str, Enum): | ||
| IN_PROGRESS = "in_progress" | ||
| FAILED = "failed" | ||
| COMPLETED = "completed" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,63 @@ | ||
| from .summarization_formatting_options import SummarizationFormattingOptions | ||
| from .summarization_job_status import SummarizationJobStatus | ||
| from ..nlp_model import NlpModel | ||
|
|
||
| """Summarization request options.""" | ||
|
||
|
|
||
|
|
||
| class SummarizationOptions: | ||
| def __init__( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. based on other files, it looks like the trailing nit, but this is a public code. also, there should be a comment |
||
| self, | ||
| prompt: str = None, | ||
| model: NlpModel = None, | ||
| formattingType: SummarizationFormattingOptions = None | ||
| ): | ||
|
||
| self.prompt = prompt | ||
| self.model = model | ||
| self.type = formattingType | ||
|
|
||
| def to_dict(self): | ||
| """Returns the raw form of the url data object as the api | ||
| expects them""" | ||
| dict_result = {} | ||
| if self.prompt: | ||
| dict_result['prompt'] = self.prompt | ||
| if self.model: | ||
| dict_result['model'] = self.model | ||
| if self.type: | ||
| dict_result['type'] = self.type | ||
|
|
||
| return dict_result | ||
|
|
||
|
|
||
| """Summarization options.""" | ||
|
||
|
|
||
|
|
||
| class Summarization(SummarizationOptions): | ||
| def __init__( | ||
| self, | ||
| prompt: str = None, | ||
| model: NlpModel = None, | ||
| formattingType: SummarizationFormattingOptions = None, | ||
| status: SummarizationJobStatus = None, | ||
| completed_on: str = None, | ||
| failure: str = None | ||
| ): | ||
| super().__init__(prompt, model, formattingType) | ||
| self.status = status | ||
| self.completed_on = completed_on | ||
| self.failure = failure | ||
|
|
||
| @classmethod | ||
| def from_json(cls, json): | ||
| if json is None: | ||
| return None | ||
|
|
||
| return cls( | ||
| json.get('prompt'), | ||
| json.get('model'), | ||
| json.get('type'), | ||
| json.get('status'), | ||
| json.get('completed_on'), | ||
| json.get('failure') | ||
| ) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it looks like we are missing to pass these parameters to
_create_job_options_payload@amikofalvy do you know whether it is intentional?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@dmtrrk We just haven't supported them in SDKs yet - there's an open PR for it -#104