Skip to content

Commit 10febf4

Browse files
authored
Merge pull request #13 from quilime/main
update scripts to skip existing existing
2 parents f37cf9d + 5318bc9 commit 10febf4

5 files changed

Lines changed: 34 additions & 16 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# build folders
2+
# uncommented lines to ignore build folders since we're managing them manually now
23
data
34
dist
45

bin/gen-all-topics.sh

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,22 @@
11
#!/bin/bash
2+
set -x
23

34
mkdir -p ./data/topics
45

5-
./bin/search-caption-string.sh ./data/videos "artificial intelligence" " ai " > ./data/topics/ai.txt
6-
./bin/search-caption-string.sh ./data/videos " art " "fine art" "gallery" > ./data/topics/art.txt
7-
./bin/search-caption-string.sh ./data/videos "biology" "cell" "genetics" "evolution" "physiology" "biochemistry" "adaptation" "reproduction" "biodiversity" "microbio" "molecular" > ./data/topics/biology.txt
8-
./bin/search-caption-string.sh ./data/videos "decolonialism" "neocolonialism" "colonial" "indigenous" "imperialism" "sovereignty" > ./data/topics/decolonialism.txt
9-
./bin/search-caption-string.sh ./data/videos "design" "graphic design" "web design" "product design" > ./data/topics/design.txt
10-
./bin/search-caption-string.sh ./data/videos "ecology" "earth" "climate change" "ecosystem" "biodiversity" "conservation" "environment" > ./data/topics/ecology.txt
11-
./bin/search-caption-string.sh ./data/videos "history" "historical" "past" "era" "ancient" "civilization" "culture" "heritage" > ./data/topics/history.txt
12-
./bin/search-caption-string.sh ./data/videos "indigenous" "wisdom" "elder" "past" "native" "heritage" "spiritual" > ./data/topics/indigenous.txt
13-
./bin/search-caption-string.sh ./data/videos "machine learning" " ml " "neural net" > ./data/topics/machine-learning.txt
14-
./bin/search-caption-string.sh ./data/videos " ar " " vr " " xr " "augmented reality" "mixed reality" "virtual reality" "immersive" "virtual world" > ./data/topics/metaverse.txt
15-
./bin/search-caption-string.sh ./data/videos "music" "song" "concert" "composition" "melody" > ./data/topics/music.txt
16-
./bin/search-caption-string.sh ./data/videos "philosophy" "metaphysics" "ethics" "aesthetic" "phenomenology" > ./data/topics/philosophy.txt
17-
./bin/search-caption-string.sh ./data/videos "software" "programming" "code" "algorithm" "open source" > ./data/topics/software.txt
18-
./bin/search-caption-string.sh ./data/videos "systems" "chaos" "complexity" "modeling" "simulation" "pattern" > ./data/topics/systems.txt
6+
./bin/search-caption-string.sh ./data/videos "artificial intelligence" " ai " " llm " "large language model" "diffusion model" "generative ai" "stable diffusion" "midjourney" "dalle" "chatgpt" " gpt" "transformer" "generative model" "deep learning" "computer vision" "natural language" "text to image" "image generation" "prompt engineering" "fine-tuning" "training data" "algorithmic bias" "ai ethics" "synthetic media" "deepfake" "ai art" "creative ai" "latent space" "embedding" "attention mechanism" "foundation model" > ./data/topics/ai.txt
7+
./bin/search-caption-string.sh ./data/videos " art " "fine art" "gallery" "artist" "artwork" "painting" "sculpture" "installation" "performance art" "contemporary art" "visual art" "exhibition" "curator" "creative practice" "artistic" "multimedia art" "digital art" "new media" "video art" "sound art" "interactive art" > ./data/topics/art.txt
8+
./bin/search-caption-string.sh ./data/videos "biology" "cell" "genetics" "evolution" "physiology" "biochemistry" "adaptation" "reproduction" "biodiversity" "microbio" "molecular" "dna" "organism" "protein" "neuroscience" "bioinformatics" "genome" "cellular" "biological" "biotechnology" "synthetic biology" "crispr" "gene editing" > ./data/topics/biology.txt
9+
./bin/search-caption-string.sh ./data/videos "decolonialism" "neocolonialism" "colonial" "indigenous" "imperialism" "sovereignty" "postcolonial" "decolonize" "anticolonial" "colonization" "self-determination" "land back" "reparations" "settler colonialism" "cultural appropriation" "oppression" "liberation" "resistance" > ./data/topics/decolonialism.txt
10+
./bin/search-caption-string.sh ./data/videos "design" "graphic design" "web design" "product design" "user experience" " ux " " ui " "interface" "interaction design" "design thinking" "typography" "visual design" "prototype" "user interface" "usability" "design system" "human-centered" "service design" "speculative design" > ./data/topics/design.txt
11+
./bin/search-caption-string.sh ./data/videos "ecology" "earth" "climate change" "ecosystem" "biodiversity" "conservation" "environment" "sustainability" "renewable" "carbon" "global warming" "environmental" "habitat" "species" "pollution" "deforestation" "ocean" "atmosphere" "anthropocene" "planetary" "ecological" > ./data/topics/ecology.txt
12+
./bin/search-caption-string.sh ./data/videos "history" "historical" "past" "era" "ancient" "civilization" "culture" "heritage" "archive" "memory" "tradition" "legacy" "chronology" "ancestor" "historic" "century" "archeology" "anthropology" "cultural history" "oral history" "collective memory" > ./data/topics/history.txt
13+
./bin/search-caption-string.sh ./data/videos "indigenous" "wisdom" "elder" "native" "heritage" "spiritual" "aboriginal" "first nations" "tribal" "ceremony" "sacred" "ancestral" "traditional knowledge" "land stewardship" "oral tradition" "indigenous people" "native american" "cultural practices" > ./data/topics/indigenous.txt
14+
./bin/search-caption-string.sh ./data/videos "machine learning" " ml " "neural net" "supervised learning" "unsupervised learning" "reinforcement learning" "convolutional" "recurrent neural" "backpropagation" "classification" "regression" "clustering" "gradient descent" "overfitting" "model training" "feature extraction" "data science" > ./data/topics/machine-learning.txt
15+
./bin/search-caption-string.sh ./data/videos " ar " " vr " " xr " "augmented reality" "mixed reality" "virtual reality" "immersive" "virtual world" "metaverse" "headset" "oculus" "hololens" "spatial computing" "3d environment" "avatar" "virtual space" "haptic" "motion tracking" "360 degree" > ./data/topics/metaverse.txt
16+
./bin/search-caption-string.sh ./data/videos "music" "song" "concert" "composition" "melody" "musician" "sound" "audio" "acoustic" "harmony" "rhythm" "synthesizer" "electronic music" "sonic" "instrument" "performance" "musical" "soundtrack" "experimental music" "improvisation" > ./data/topics/music.txt
17+
./bin/search-caption-string.sh ./data/videos "philosophy" "metaphysics" "ethics" "aesthetic" "phenomenology" "epistemology" "ontology" "existential" "consciousness" "perception" "moral" "philosophical" "critique" "discourse" "theory" "dialectic" "rationalism" "empiricism" "postmodern" > ./data/topics/philosophy.txt
18+
./bin/search-caption-string.sh ./data/videos "software" "programming" "code" "algorithm" "open source" "developer" "application" "platform" "framework" "library" "api" "database" "frontend" "backend" "debugging" "version control" "git" "coding" "scripting" "computational" > ./data/topics/software.txt
19+
./bin/search-caption-string.sh ./data/videos "systems" "chaos" "complexity" "modeling" "simulation" "pattern" "emergence" "feedback loop" "network" "dynamics" "self-organization" "interconnected" "systemic" "complex system" "nonlinear" "adaptive" "holistic" "cybernetics" "agent-based" > ./data/topics/systems.txt
1920

2021
# convert to json
2122
for file in $(find ./data/topics/ -type f -name "*.txt"); do

bin/get-all-metadata.sh

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ if [[ -z "$DEST" ]]; then
4545
fi
4646

4747
# Build yt-dlp options
48-
YTDLP_OPTS="-r 50K --ignore-errors --write-info-json --skip-download"
48+
YTDLP_OPTS="-r 50K --ignore-errors --write-info-json --skip-download --js-runtimes node --remote-components ejs:github"
4949
if [[ -n "$BROWSER" ]]; then
5050
YTDLP_OPTS="$YTDLP_OPTS --cookies-from-browser $BROWSER"
5151
echo "Using cookies from $BROWSER"
@@ -70,7 +70,16 @@ for i in "${!years[@]}"; do
7070
fi
7171

7272
mkdir -p $DEST/$year
73-
echo "Processing playlists $year"
73+
74+
# Check if metadata already exists
75+
existing_files=$(find "$DEST/$year" -type f -name "*.info.json" 2>/dev/null | wc -l)
76+
if [[ $existing_files -gt 0 ]]; then
77+
echo "Processing playlists $year (skipping $existing_files existing files)"
78+
YTDLP_OPTS="$YTDLP_OPTS --no-overwrites"
79+
else
80+
echo "Processing playlists $year"
81+
fi
82+
7483
{
7584
yt-dlp $YTDLP_OPTS -o "$DEST/$year/%(title)s [%(id)s].%(ext)s" "$url"
7685
} || {

bin/get-youtube-subs.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,12 @@ if [[ -z "$YTID" ]]; then
3636
exit 1
3737
fi
3838

39+
# Check if VTT already exists
40+
if [[ -n "$DEST" && -f "${DEST}.en.vtt" ]]; then
41+
echo "Skipping $YTID (VTT exists)"
42+
exit 0
43+
fi
44+
3945
# Build yt-dlp options
4046
YTDLP_OPTS="--sub-lan=en --write-auto-sub --skip-download"
4147
if [[ -n "$BROWSER" ]]; then

dist/output.css

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)