Skip to content

Commit 30a4b36

Browse files
committed
[YouTube] Optimize extracting auto-translated captions
Faster and ordered: captions provided by the user are at the beginning of the list, auto-translated captions are at the end
1 parent ff030ad commit 30a4b36

File tree

3 files changed

+138
-47
lines changed

3 files changed

+138
-47
lines changed

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java

Lines changed: 52 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -670,52 +670,72 @@ public List<SubtitlesStream> getSubtitles(@Nonnull final MediaFormat format)
670670
assertPageFetched();
671671

672672
// We cannot store the subtitles list because the media format may change
673-
final List<SubtitlesStream> subtitlesToReturn = new ArrayList<>();
673+
final List<SubtitlesStream> subtitles = new ArrayList<>();
674+
final List<SubtitlesStream> autoTranslatedSubtitles = new ArrayList<>();
674675
final JsonObject renderer = playerResponse.getObject("captions")
675676
.getObject("playerCaptionsTracklistRenderer");
676677
final JsonArray captionsArray = renderer.getArray("captionTracks");
677678

679+
// Generate list of languages available for auto-translations
680+
final List<String> translationLanguages;
681+
if (renderer.has("translationLanguages")) {
682+
translationLanguages = renderer.getArray("translationLanguages")
683+
.stream()
684+
.map(JsonObject.class::cast)
685+
.map(lang -> lang.getString("languageCode"))
686+
.collect(Collectors.toList());
687+
} else {
688+
translationLanguages = Collections.emptyList();
689+
}
690+
691+
// Add subtitles
678692
for (int i = 0; i < captionsArray.size(); i++) {
679693
final JsonObject caption = captionsArray.getObject(i);
680694
final String languageCode = caption.getString("languageCode");
681695
final String baseUrl = caption.getString("baseUrl");
682696
final String vssId = caption.getString("vssId");
683697

684-
if (languageCode != null && baseUrl != null && vssId != null) {
685-
final boolean isAutoGenerated = vssId.startsWith("a.");
686-
final String cleanUrl = baseUrl
687-
// Remove preexisting format if exists
688-
.replaceAll("&fmt=[^&]*", "")
689-
// Remove translation language
690-
.replaceAll("&tlang=[^&]*", "");
691-
692-
subtitlesToReturn.add(new SubtitlesStream.Builder()
693-
.setContent(cleanUrl + "&fmt=" + format.getSuffix(), true)
694-
.setMediaFormat(format)
695-
.setLanguageCode(languageCode)
696-
.setAutoGenerated(isAutoGenerated)
697-
.setAutoTranslated(false)
698-
.build());
699-
if (i == 0 && caption.getBoolean("isTranslatable")
700-
&& renderer.has("translationLanguages")) {
701-
final JsonArray languages = renderer.getArray("translationLanguages");
702-
for (int j = 0; j < languages.size(); j++) {
703-
final JsonObject lang = languages.getObject(j);
704-
final String tLanguageCode = lang.getString("languageCode");
705-
subtitlesToReturn.add(new SubtitlesStream.Builder()
706-
.setContent(cleanUrl + "&fmt=" + format.getSuffix()
707-
+ "&tlang=" + tLanguageCode, true)
708-
.setMediaFormat(format)
709-
.setLanguageCode(tLanguageCode)
710-
.setAutoGenerated(isAutoGenerated)
711-
.setAutoTranslated(true)
712-
.build());
713-
}
698+
if (languageCode == null || baseUrl == null || vssId == null) {
699+
continue;
700+
}
701+
702+
final boolean isAutoGenerated = vssId.startsWith("a.");
703+
final String cleanUrl = baseUrl
704+
// Remove preexisting format if exists
705+
.replaceAll("&fmt=[^&]*", "")
706+
// Remove translation language
707+
.replaceAll("&tlang=[^&]*", "");
708+
709+
// add base subtitles
710+
subtitles.add(new SubtitlesStream.Builder()
711+
.setContent(cleanUrl + "&fmt=" + format.getSuffix(), true)
712+
.setMediaFormat(format)
713+
.setLanguageCode(languageCode)
714+
.setAutoGenerated(isAutoGenerated)
715+
.setAutoTranslated(false)
716+
.build());
717+
718+
// add auto-translations of this subtitle if available
719+
if (caption.getBoolean("isTranslatable")) {
720+
for (final String tLanguageCode : translationLanguages) {
721+
autoTranslatedSubtitles.add(new SubtitlesStream.Builder()
722+
.setContent(cleanUrl + "&fmt=" + format.getSuffix()
723+
+ "&tlang=" + tLanguageCode, true)
724+
.setMediaFormat(format)
725+
.setLanguageCode(tLanguageCode)
726+
.setAutoGenerated(true)
727+
.setAutoTranslated(true)
728+
.setBaseLanguageCode(languageCode)
729+
.build());
714730
}
715731
}
732+
716733
}
717734

718-
return subtitlesToReturn;
735+
// add auto-translations at the end for better sorting
736+
subtitles.addAll(autoTranslatedSubtitles);
737+
738+
return subtitles;
719739
}
720740

721741
@Override

extractor/src/main/java/org/schabi/newpipe/extractor/stream/SubtitlesStream.java

Lines changed: 84 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212

1313
public final class SubtitlesStream extends Stream {
1414
private final MediaFormat format;
15+
@Nullable
16+
private final Locale baseLocale;
1517
private final Locale locale;
1618
private final boolean autoGenerated;
1719
private final boolean autoTranslated;
@@ -31,6 +33,8 @@ public static final class Builder {
3133
@Nullable
3234
private String manifestUrl;
3335
private String languageCode;
36+
@Nullable
37+
private String baseLanguageCode;
3438
// Use of the Boolean class instead of the primitive type needed for setter call check
3539
private Boolean autoGenerated;
3640
private Boolean autoTranslated;
@@ -142,6 +146,18 @@ public Builder setLanguageCode(@Nonnull final String languageCode) {
142146
return this;
143147
}
144148

149+
/**
150+
* Set the language code of the base language used to auto-translate
151+
* the {@link SubtitlesStream} to the current language code.
152+
*
153+
* @param baseLanguageCode the language code of the {@link SubtitlesStream}
154+
* @return this {@link Builder} instance
155+
*/
156+
public Builder setBaseLanguageCode(@Nullable final String baseLanguageCode) {
157+
this.baseLanguageCode = baseLanguageCode;
158+
return this;
159+
}
160+
145161
/**
146162
* Set whether the subtitles have been auto-generated by the streaming service.
147163
*
@@ -222,26 +238,29 @@ public SubtitlesStream build() throws ParsingException {
222238
}
223239

224240
return new SubtitlesStream(id, content, isUrl, mediaFormat, deliveryMethod,
225-
languageCode, autoGenerated, autoTranslated, manifestUrl);
241+
languageCode, autoGenerated, autoTranslated, baseLanguageCode, manifestUrl);
226242
}
227243
}
228244

229245
/**
230246
* Create a new subtitles stream.
231247
*
232-
* @param id the identifier which uniquely identifies the stream, e.g. for YouTube
233-
* this would be the itag
234-
* @param content the content or the URL of the stream, depending on whether isUrl is
235-
* true
236-
* @param isUrl whether content is the URL or the actual content of e.g. a DASH
237-
* manifest
238-
* @param mediaFormat the {@link MediaFormat} used by the stream
239-
* @param deliveryMethod the {@link DeliveryMethod} of the stream
240-
* @param languageCode the language code of the stream
241-
* @param autoGenerated whether the subtitles are auto-generated by the streaming service
242-
* @param autoTranslated whether the subtitles are auto-translated by the streaming service
243-
* @param manifestUrl the URL of the manifest this stream comes from (if applicable,
244-
* otherwise null)
248+
* @param id the identifier which uniquely identifies the stream, e.g. for YouTube
249+
* this would be the itag
250+
* @param content the content or the URL of the stream, depending on whether isUrl is
251+
* true
252+
* @param isUrl whether content is the URL or the actual content of e.g. a DASH
253+
* manifest
254+
* @param mediaFormat the {@link MediaFormat} used by the stream
255+
* @param deliveryMethod the {@link DeliveryMethod} of the stream
256+
* @param languageCode the language code of the stream
257+
* @param autoGenerated whether the subtitles are auto-generated by the streaming service
258+
* @param autoTranslated whether the subtitles are auto-translated by the streaming service
259+
* @param baseLanguageCode the language code of the base language used to translate
260+
* the subtitles to the current language
261+
* or null if the subtitles are not auto-translated
262+
* @param manifestUrl the URL of the manifest this stream comes from (if applicable,
263+
* otherwise null)
245264
*/
246265
@SuppressWarnings("checkstyle:ParameterNumber")
247266
private SubtitlesStream(@Nonnull final String id,
@@ -252,6 +271,7 @@ private SubtitlesStream(@Nonnull final String id,
252271
@Nonnull final String languageCode,
253272
final boolean autoGenerated,
254273
final boolean autoTranslated,
274+
@Nullable final String baseLanguageCode,
255275
@Nullable final String manifestUrl) throws ParsingException {
256276
super(id, content, isUrl, mediaFormat, deliveryMethod, manifestUrl);
257277
this.locale = LocaleCompat.forLanguageTag(languageCode).orElseThrow(
@@ -261,6 +281,13 @@ private SubtitlesStream(@Nonnull final String id,
261281
this.format = mediaFormat;
262282
this.autoGenerated = autoGenerated;
263283
this.autoTranslated = autoTranslated;
284+
if (baseLanguageCode == null) {
285+
this.baseLocale = null;
286+
} else {
287+
this.baseLocale = LocaleCompat.forLanguageTag(baseLanguageCode).orElseThrow(
288+
() -> new ParsingException(
289+
"not a valid locale language code: " + baseLanguageCode));
290+
}
264291
}
265292

266293
/**
@@ -337,6 +364,37 @@ public Locale getLocale() {
337364
return locale;
338365
}
339366

367+
/**
368+
* Get the {@link Locale baseLocale} which was used to automatically translated the subtitles
369+
* into the current {@link #locale}.
370+
*
371+
* @return the {@link Locale baseLocale} for the subtitle translation
372+
* or {@code null} if the subtitle is not auto-translated
373+
*/
374+
@Nullable
375+
public Locale getBaseLocale() {
376+
return baseLocale;
377+
}
378+
379+
/**
380+
* Get the display base language name of the subtitles.
381+
*
382+
* @return the display language name of the subtitles
383+
*/
384+
public String getDisplayBaseLanguageName() {
385+
return locale.getDisplayName(locale);
386+
}
387+
388+
/**
389+
* Get the language tag of the subtitles.
390+
*
391+
* @return the language tag of the subtitles
392+
*/
393+
public String getBaseLanguageTag() {
394+
return code;
395+
}
396+
397+
340398
/**
341399
* No subtitles which are currently extracted use an {@link ItagItem}, so {@code null} is
342400
* returned by this method.
@@ -348,4 +406,16 @@ public Locale getLocale() {
348406
public ItagItem getItagItem() {
349407
return null;
350408
}
409+
410+
@Override
411+
public String toString() {
412+
return "SubtitlesStream{"
413+
+ "format=" + format
414+
+ ", baseLocale=" + baseLocale
415+
+ ", locale=" + locale
416+
+ ", autoGenerated=" + autoGenerated
417+
+ ", autoTranslated=" + autoTranslated
418+
+ ", code='" + code + '\''
419+
+ '}';
420+
}
351421
}

extractor/src/main/java/org/schabi/newpipe/extractor/utils/LocaleCompat.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package org.schabi.newpipe.extractor.utils;
22

3+
import javax.annotation.Nonnull;
34
import java.util.Locale;
45
import java.util.Optional;
56

@@ -16,7 +17,7 @@ private LocaleCompat() {
1617

1718
// Source: The AndroidX LocaleListCompat class's private forLanguageTagCompat() method.
1819
// Use Locale.forLanguageTag() on Android API level >= 21 / Java instead.
19-
public static Optional<Locale> forLanguageTag(final String str) {
20+
public static Optional<Locale> forLanguageTag(@Nonnull final String str) {
2021
if (str.contains("-")) {
2122
final String[] args = str.split("-", -1);
2223
if (args.length > 2) {

0 commit comments

Comments
 (0)