Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions api/src/processing/match-action.js
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ export default function({
case "ok":
case "xiaohongshu":
case "newgrounds":
case "sora":
params = { type: "proxy" };
break;

Expand Down
9 changes: 9 additions & 0 deletions api/src/processing/match.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import facebook from "./services/facebook.js";
import bluesky from "./services/bluesky.js";
import xiaohongshu from "./services/xiaohongshu.js";
import newgrounds from "./services/newgrounds.js";
import sora from "./services/sora.js";

let freebind;

Expand Down Expand Up @@ -276,6 +277,14 @@ export default async function({ host, patternMatch, params, authType }) {
});
break;

case "sora":
r = await sora({
postId: patternMatch.postId,
quality: params.videoQuality,
isAudioOnly,
});
break;

default:
return createResponse("error", {
code: "error.api.service.unsupported"
Expand Down
4 changes: 4 additions & 0 deletions api/src/processing/service-config.js
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,10 @@ export const services = {
"v/:id"
],
subdomains: ["music", "m"],
},
sora: {
patterns: ["p/:postId"],
altDomains: ["sora.chatgpt.com"]
}
}

Expand Down
3 changes: 3 additions & 0 deletions api/src/processing/service-patterns.js
Original file line number Diff line number Diff line change
Expand Up @@ -87,4 +87,7 @@ export const testers = {

"youtube": pattern =>
pattern.id?.length <= 11,

"sora": pattern =>
pattern.postId?.length <= 64,
}
173 changes: 173 additions & 0 deletions api/src/processing/services/sora.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
import { genericUserAgent } from "../../config.js";

// Helper function to add delay between requests
const delay = (ms) => new Promise((resolve) => setTimeout(resolve, ms));

// Helper function to check if response is a Cloudflare challenge
const isCloudflareChallenge = (response) => {
return (
response.status === 403 ||
response.status === 503 ||
(response.status === 200 &&
response.headers.get("server")?.includes("cloudflare"))
);
};

// Enhanced fetch with retry logic for Cloudflare challenges
const fetchWithRetry = async (url, options, maxRetries = 3) => {
let lastError;

for (let attempt = 1; attempt <= maxRetries; attempt++) {
try {
const response = await fetch(url, options);

// If it's a Cloudflare challenge and not the last attempt, wait and retry
if (isCloudflareChallenge(response) && attempt < maxRetries) {
await delay(1000 * attempt); // Exponential backoff
continue;
}

return response;
} catch (error) {
lastError = error;
if (attempt < maxRetries) {
await delay(1000 * attempt);
continue;
}
throw error;
}
}

throw lastError;
};

export default async function (obj) {
let videoId = obj.postId;
if (!videoId) {
return { error: "fetch.empty" };
}

try {
// For /p/ (post) URLs, use HTML parsing
if (obj.postId) {
return await handlePostUrl(obj.postId, obj);
}

return { error: "fetch.empty" };
} catch (error) {
console.error("Sora service error:", error);
return { error: "fetch.fail" };
}
}

async function handlePostUrl(postId, obj) {
const targetUrl = `https://sora.com/p/${postId}`;

const res = await fetchWithRetry(targetUrl, {
headers: {
"user-agent": genericUserAgent,
accept:
"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"accept-language": "en-US,en;q=0.9",
"accept-encoding": "gzip, deflate, br",
"sec-ch-ua":
'"Google Chrome";v="138", "Chromium";v="138", "Not=A?Brand";v="99"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Windows"',
"sec-fetch-dest": "document",
"sec-fetch-mode": "navigate",
"sec-fetch-site": "none",
"sec-fetch-user": "?1",
"upgrade-insecure-requests": "1",
"cache-control": "max-age=0",
dnt: "1",
},
});

if (!res.ok) {
return { error: "fetch.fail" };
}

const html = await res.text();

// Extract video URL from HTML and script tags
let videoUrl;
let title;

// Look for video URLs in various patterns within the HTML and script content
const videoPatterns = [
/https:\/\/videos\.openai\.com\/vg-assets\/[^"'>\s]+\.mp4[^"'>\s]*/g,
/"(https:\/\/videos\.openai\.com\/vg-assets\/[^"]+\.mp4[^"]*)"/g,
/'(https:\/\/videos\.openai\.com\/vg-assets\/[^']+\.mp4[^']*)'/g,
/\\u[\da-f]{4}(https:\/\/videos\.openai\.com\/vg-assets\/[^\\]+\.mp4)/gi,
/(https:\/\/videos\.openai\.com\/[^"'>\s\\]+\.mp4)/gi,
];

// First try to find video URL in the main HTML
for (const pattern of videoPatterns) {
const match = html.match(pattern);
if (match) {
videoUrl = match[0].replace(/^["']|["']$/g, ""); // Remove quotes
break;
}
}

// If not found, search through script tags more thoroughly
if (!videoUrl) {
const scriptMatches = html.match(/<script[^>]*>(.*?)<\/script>/gs);

Check failure

Code scanning / CodeQL

Bad HTML filtering regexp High

This regular expression does not match upper case <SCRIPT> tags.
if (scriptMatches) {
for (const script of scriptMatches) {
// Try each pattern on script content
for (const pattern of videoPatterns) {
const matches = script.match(pattern);
if (matches) {
for (const match of matches) {
let candidate = match.replace(/^["']|["']$/g, "");
// Handle escaped characters
candidate = candidate.replace(/\\u[\da-f]{4}/gi, "");
candidate = candidate.replace(/\\\//g, "/");

if (
candidate.includes("videos.openai.com") &&

Check failure

Code scanning / CodeQL

Incomplete URL substring sanitization High

'
videos.openai.com
' can be anywhere in the URL, and arbitrary hosts may come before or after it.
candidate.includes(".mp4")
) {
videoUrl = candidate;
break;
}
}
if (videoUrl) break;
}
}
if (videoUrl) break;
}
}
}

// Extract title from HTML title tag
const titleMatch = html.match(/<title>([^<]+)<\/title>/);
if (titleMatch) {
title = titleMatch[1].replace(" - Sora", "").replace(" | Sora", "").trim();
}

// Decode HTML entities if present
if (videoUrl) {
videoUrl = videoUrl.replace(/&amp;/g, "&");
}

if (!videoUrl) {
return { error: "fetch.empty" };
}

// Generate filename
const cleanId = postId.replace(/[^a-zA-Z0-9_-]/g, "");
const videoFilename = `sora_${cleanId}.mp4`;

return {
type: "proxy",
urls: videoUrl,
filename: videoFilename,
fileMetadata: {
title: title || `Sora Video ${cleanId}`,
},
};
}
6 changes: 6 additions & 0 deletions api/src/processing/url.js
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,12 @@ function aliasURL(url) {
}
break;

case "chatgpt":
if (url.hostname === 'sora.chatgpt.com') {
url.hostname = 'sora.com';
}
break;

case "redd":
/* reddit short video links can be treated by changing https://v.redd.it/<id>
to https://reddit.com/video/<id>.*/
Expand Down