Mix.install(
[
{:kino, "~> 0.9.1"},
{:youtube_organizer, path: Path.join([__DIR__, "youtube_organizer"])},
{:text_mining, path: Path.join([__DIR__, "text_mining"])}
],
config: [nx: [default_backend: EXLA.Backend]]
)
Setup creds using this guide
creds_path = Path.join(__DIR__, "conf/creds.json")
auth = YTOrg.AuthWrapper.new(Path.join(__DIR__, "conf/creds.json"))
:ok
playlists =
%YTOrg.YoutubePlaylistFetcher{auth: auth}
|> YTOrg.YoutubePlaylistFetcher.fetch_youtube_playlists()
[playlist | _] = playlists
%YTOrg.YoutubePlaylist{videos: [vid | _]} = playlist
vid
defmodule KinoHelpers do
def get_link_md(%YTOrg.YoutubeVideo{title: title, url: url}) do
"[#{title}](#{url})"
end
def get_link_kino(%YTOrg.YoutubeVideo{} = vid) do
vid
|> get_link_md()
|> Kino.Markdown.new()
end
def get_thumbnail_url(%YTOrg.YoutubeVideo{thumbnails: thumbnails}, size \\ "medium") do
thumbnails[size]["url"]
end
def maybe_add_preceding_link_kino(layout, vid = %YTOrg.YoutubeVideo{}) do
closest_vid_kino =
vid
|> get_link_kino()
Kino.Layout.grid([closest_vid_kino, layout])
end
def maybe_add_preceding_link_kino(layout, nil) do
layout
end
end
defmodule UIComponent do
@enforce_keys [:form, :frame, :inputs]
defstruct [:form, :frame, :inputs]
end
defmodule YoutubePlaylistInterface do
alias YTOrg.YoutubePlaylist
import KinoHelpers
defstruct [:playlists, :ui_component]
def get_playlists_by_name(interface) do
for %YoutubePlaylist{metadata: meta, videos: videos} <- interface.playlists, into: %{} do
{meta.title, videos}
end
end
def from_google_creds(google_creds_path \\ "creds.json") do
google_creds_path
|> YoutubePlaylistFetcher.new()
|> YoutubePlaylistFetcher.fetch_youtube_playlists()
|> new()
end
def new(playlists) do
playlist_options =
for playlist <- playlists, do: {playlist.metadata.title, playlist.metadata.title}
playlist_name = Kino.Input.select("playlist_name", playlist_options)
frame = Kino.Frame.new()
inputs = [
playlist_name: playlist_name,
display_counts: Kino.Input.checkbox("Display playlist counts")
]
form = Kino.Control.form(inputs, submit: "Send", report_changes: true)
ui_component = %UIComponent{inputs: inputs, frame: frame, form: form}
playlists |> wire_widgets(frame, form)
%YoutubePlaylistInterface{playlists: playlists, ui_component: ui_component}
end
def wire_widgets(playlists, frame, form) do
playlists_by_name =
for playlist <- playlists, into: %{} do
{playlist.metadata.title, playlist}
end
playlist_callback = fn rec -> playlist_callback(playlists_by_name, frame, rec) end
Kino.listen(form, playlist_callback)
end
defp playlist_callback(playlists_by_name, frame, %{
data: %{playlist_name: playlist_name, display_counts: false},
origin: origin
}) do
playlist_kino =
playlists_by_name[playlist_name]
|> get_playlist_kino()
frame |> Kino.Frame.render(playlist_kino)
:ok
end
defp playlist_callback(playlists_by_name, frame, %{
data: %{playlist_name: playlist_name, display_counts: true},
origin: origin
}) do
frame |> Kino.Frame.render(get_playlist_info_kino(playlists_by_name))
:ok
end
def get_playlist_kino(playlist) do
%YoutubePlaylist{metadata: metadata, videos: videos, representing_video: representing_video} =
playlist
images =
for vid <- videos do
image = Kino.Markdown.new("})")
title_with_link = vid |> get_link_md() |> Kino.Markdown.new()
Kino.Layout.grid([image, title_with_link], boxed: true)
end
image_grid_layout = Kino.Layout.grid(images, columns: 4)
image_grid_layout |> maybe_add_preceding_link_kino(representing_video)
end
def display(interface = %YoutubePlaylistInterface{}) do
interface.ui_component.form |> Kino.render()
interface.ui_component.frame |> Kino.render()
end
defp playlist_info(playlists_by_name) do
plist_infos =
for {_, plist} <- playlists_by_name do
"#{plist.metadata.title}: #{Enum.count(plist.videos)}"
end
plist_infos |> Enum.join("\n\n")
end
defp get_playlist_info_kino(playlists_by_name) do
playlists_by_name
|> playlist_info()
|> Kino.Markdown.new()
end
def add_markdown_to_frame(frame, markdown_str) do
Kino.Frame.append(frame, Kino.Markdown.new(markdown_str))
end
end
interface = YoutubePlaylistInterface.new(playlists)
interface |> YoutubePlaylistInterface.display()
:ok
defmodule YTOrg.PlaylistClusterer do
alias TextMining.{EmbeddingComparator, EmbeddingClusterer, TextComparator}
alias YTOrg.YTTextMining
defstruct [:clusterer, :clustered_playlists]
def new(input_playlists, field, add_most_central_videos) do
%TextMining.EmbeddingComparator{text_embedder: embedder} = EmbeddingComparator.new()
{clusterer, clustered_playlists} =
EmbeddingClusterer.new(embedder)
|> YTTextMining.get_clustered_playlists(input_playlists, field, add_most_central_videos)
%YTOrg.PlaylistClusterer{clusterer: clusterer, clustered_playlists: clustered_playlists}
end
end
defmodule YTOrg.YTTextMining do
@moduledoc """
Utils for converting YT data to TextMining package's documents
"""
alias TextMining.{EmbeddingComparator, EmbeddingClusterer, Document}
alias YTOrg.YoutubePlaylist
@doc """
flatten playlists and return playlists defined by clustering
"""
def get_clustered_playlists(clusterer, playlists, text_field, add_most_central_videos)
when add_most_central_videos == true do
documents = playlists |> get_documents_from_playlists(text_field)
fitted_clusterer = clusterer |> EmbeddingClusterer.fit_clustering(documents, 10)
clustered_playlist =
fitted_clusterer
|> EmbeddingClusterer.get_clustered_documents(documents)
|> playlists_from_clustering_result()
{fitted_clusterer, clustered_playlist}
end
def get_clustered_playlists(clusterer, playlists, text_field, add_most_central_videos)
when add_most_central_videos do
{fitted_clusterer, raw_clustered_playlists} =
clusterer
|> get_clustered_playlists(playlists, text_field, false)
clustered_playlists =
fitted_clusterer
|> add_closest_centroid_videos(raw_clustered_playlists, text_field)
{fitted_clusterer, clustered_playlists}
end
def video_to_document(%YTOrg.YoutubeVideo{title: title} = record, :title) do
%Document{text: title, id: title, metadata: record}
end
def video_to_document(
%YTOrg.YoutubeVideo{title: title, description: description} = record,
:description
) do
%TextMining.Document{text: description, id: title, metadata: record}
end
defp playlists_from_clustering_result(clustered_documents) do
for {cluster_id, documents} <- clustered_documents do
videos =
for doc <- documents do
doc.metadata
end
%YoutubePlaylist{
metadata: %{id: cluster_id, title: cluster_id},
videos: videos
}
end
end
defp flatten_playlists(playlists) do
for playlist <- playlists, video <- playlist.videos do
video
end
end
defp get_documents_from_playlists(playlists, text_field) do
for video <- flatten_playlists(playlists) do
video_to_document(video, text_field)
end
end
defp add_closest_centroid_videos(clusterer, clustered_playlists, text_field) do
clusters = clusterer.model.clusters
{n_clusters, _} = clusters.shape
embedder = clusterer.text_embedder
for {playlist, i} <- Enum.with_index(clustered_playlists) do
cluster = clusters[i..i]
documents = [playlist] |> get_documents_from_playlists(text_field)
closest_index = embedder |> TextMining.TextEmbedder.get_closest_index(cluster, documents)
closest_video = documents |> Enum.at(closest_index)
playlist
|> Map.put(:representing_video, closest_video.metadata)
end
end
end
title_playlist_clusterer = YTOrg.PlaylistClusterer.new(playlists, :title, true)
:ok
clustered_interface = YoutubePlaylistInterface.new(title_playlist_clusterer.clustered_playlists)
clustered_interface |> YoutubePlaylistInterface.display()
:ok
clustered_interface |> YoutubePlaylistInterface.display()
interface = YoutubePlaylistInterface.new(title_clustered_playlists)
:ok
clusterer.model
interface = YoutubePlaylistInterface.new(description_clustered_playlists)
:ok