Skip to content

Latest commit

 

History

History
337 lines (264 loc) · 8.8 KB

yt_playlists.livemd

File metadata and controls

337 lines (264 loc) · 8.8 KB

Youtube playlists

Mix.install(
  [
    {:kino, "~> 0.9.1"},
    {:youtube_organizer, path: Path.join([__DIR__, "youtube_organizer"])},
    {:text_mining, path: Path.join([__DIR__, "text_mining"])}
  ],
  config: [nx: [default_backend: EXLA.Backend]]
)

Section

Setup creds using this guide

creds_path = Path.join(__DIR__, "conf/creds.json")
auth = YTOrg.AuthWrapper.new(Path.join(__DIR__, "conf/creds.json"))
:ok
playlists =
  %YTOrg.YoutubePlaylistFetcher{auth: auth}
  |> YTOrg.YoutubePlaylistFetcher.fetch_youtube_playlists()

[playlist | _] = playlists
%YTOrg.YoutubePlaylist{videos: [vid | _]} = playlist
vid
defmodule KinoHelpers do
  def get_link_md(%YTOrg.YoutubeVideo{title: title, url: url}) do
    "[#{title}](#{url})"
  end

  def get_link_kino(%YTOrg.YoutubeVideo{} = vid) do
    vid
    |> get_link_md()
    |> Kino.Markdown.new()
  end

  def get_thumbnail_url(%YTOrg.YoutubeVideo{thumbnails: thumbnails}, size \\ "medium") do
    thumbnails[size]["url"]
  end

  def maybe_add_preceding_link_kino(layout, vid = %YTOrg.YoutubeVideo{}) do
    closest_vid_kino =
      vid
      |> get_link_kino()

    Kino.Layout.grid([closest_vid_kino, layout])
  end

  def maybe_add_preceding_link_kino(layout, nil) do
    layout
  end
end
defmodule UIComponent do
  @enforce_keys [:form, :frame, :inputs]
  defstruct [:form, :frame, :inputs]
end

defmodule YoutubePlaylistInterface do
  alias YTOrg.YoutubePlaylist
  import KinoHelpers

  defstruct [:playlists, :ui_component]

  def get_playlists_by_name(interface) do
    for %YoutubePlaylist{metadata: meta, videos: videos} <- interface.playlists, into: %{} do
      {meta.title, videos}
    end
  end

  def from_google_creds(google_creds_path \\ "creds.json") do
    google_creds_path
    |> YoutubePlaylistFetcher.new()
    |> YoutubePlaylistFetcher.fetch_youtube_playlists()
    |> new()
  end

  def new(playlists) do
    playlist_options =
      for playlist <- playlists, do: {playlist.metadata.title, playlist.metadata.title}

    playlist_name = Kino.Input.select("playlist_name", playlist_options)
    frame = Kino.Frame.new()

    inputs = [
      playlist_name: playlist_name,
      display_counts: Kino.Input.checkbox("Display playlist counts")
    ]

    form = Kino.Control.form(inputs, submit: "Send", report_changes: true)

    ui_component = %UIComponent{inputs: inputs, frame: frame, form: form}
    playlists |> wire_widgets(frame, form)

    %YoutubePlaylistInterface{playlists: playlists, ui_component: ui_component}
  end

  def wire_widgets(playlists, frame, form) do
    playlists_by_name =
      for playlist <- playlists, into: %{} do
        {playlist.metadata.title, playlist}
      end

    playlist_callback = fn rec -> playlist_callback(playlists_by_name, frame, rec) end

    Kino.listen(form, playlist_callback)
  end

  defp playlist_callback(playlists_by_name, frame, %{
         data: %{playlist_name: playlist_name, display_counts: false},
         origin: origin
       }) do
    playlist_kino =
      playlists_by_name[playlist_name]
      |> get_playlist_kino()

    frame |> Kino.Frame.render(playlist_kino)
    :ok
  end

  defp playlist_callback(playlists_by_name, frame, %{
         data: %{playlist_name: playlist_name, display_counts: true},
         origin: origin
       }) do
    frame |> Kino.Frame.render(get_playlist_info_kino(playlists_by_name))
    :ok
  end

  def get_playlist_kino(playlist) do
    %YoutubePlaylist{metadata: metadata, videos: videos, representing_video: representing_video} =
      playlist

    images =
      for vid <- videos do
        image = Kino.Markdown.new("![](#{vid |> get_thumbnail_url()})")
        title_with_link = vid |> get_link_md() |> Kino.Markdown.new()
        Kino.Layout.grid([image, title_with_link], boxed: true)
      end

    image_grid_layout = Kino.Layout.grid(images, columns: 4)
    image_grid_layout |> maybe_add_preceding_link_kino(representing_video)
  end

  def display(interface = %YoutubePlaylistInterface{}) do
    interface.ui_component.form |> Kino.render()
    interface.ui_component.frame |> Kino.render()
  end

  defp playlist_info(playlists_by_name) do
    plist_infos =
      for {_, plist} <- playlists_by_name do
        "#{plist.metadata.title}: #{Enum.count(plist.videos)}"
      end

    plist_infos |> Enum.join("\n\n")
  end

  defp get_playlist_info_kino(playlists_by_name) do
    playlists_by_name
    |> playlist_info()
    |> Kino.Markdown.new()
  end

  def add_markdown_to_frame(frame, markdown_str) do
    Kino.Frame.append(frame, Kino.Markdown.new(markdown_str))
  end
end
interface = YoutubePlaylistInterface.new(playlists)
interface |> YoutubePlaylistInterface.display()
:ok

Clustering videos into playlists

defmodule YTOrg.PlaylistClusterer do
  alias TextMining.{EmbeddingComparator, EmbeddingClusterer, TextComparator}
  alias YTOrg.YTTextMining

  defstruct [:clusterer, :clustered_playlists]

  def new(input_playlists, field, add_most_central_videos) do
    %TextMining.EmbeddingComparator{text_embedder: embedder} = EmbeddingComparator.new()

    {clusterer, clustered_playlists} =
      EmbeddingClusterer.new(embedder)
      |> YTTextMining.get_clustered_playlists(input_playlists, field, add_most_central_videos)

    %YTOrg.PlaylistClusterer{clusterer: clusterer, clustered_playlists: clustered_playlists}
  end
end

defmodule YTOrg.YTTextMining do
  @moduledoc """
  Utils for converting YT data to TextMining package's documents
  """
  alias TextMining.{EmbeddingComparator, EmbeddingClusterer, Document}
  alias YTOrg.YoutubePlaylist

  @doc """
  flatten playlists and return playlists defined by clustering
  """
  def get_clustered_playlists(clusterer, playlists, text_field, add_most_central_videos)
      when add_most_central_videos == true do
    documents = playlists |> get_documents_from_playlists(text_field)
    fitted_clusterer = clusterer |> EmbeddingClusterer.fit_clustering(documents, 10)

    clustered_playlist =
      fitted_clusterer
      |> EmbeddingClusterer.get_clustered_documents(documents)
      |> playlists_from_clustering_result()

    {fitted_clusterer, clustered_playlist}
  end

  def get_clustered_playlists(clusterer, playlists, text_field, add_most_central_videos)
      when add_most_central_videos do
    {fitted_clusterer, raw_clustered_playlists} =
      clusterer
      |> get_clustered_playlists(playlists, text_field, false)

    clustered_playlists =
      fitted_clusterer
      |> add_closest_centroid_videos(raw_clustered_playlists, text_field)

    {fitted_clusterer, clustered_playlists}
  end

  def video_to_document(%YTOrg.YoutubeVideo{title: title} = record, :title) do
    %Document{text: title, id: title, metadata: record}
  end

  def video_to_document(
        %YTOrg.YoutubeVideo{title: title, description: description} = record,
        :description
      ) do
    %TextMining.Document{text: description, id: title, metadata: record}
  end

  defp playlists_from_clustering_result(clustered_documents) do
    for {cluster_id, documents} <- clustered_documents do
      videos =
        for doc <- documents do
          doc.metadata
        end

      %YoutubePlaylist{
        metadata: %{id: cluster_id, title: cluster_id},
        videos: videos
      }
    end
  end

  defp flatten_playlists(playlists) do
    for playlist <- playlists, video <- playlist.videos do
      video
    end
  end

  defp get_documents_from_playlists(playlists, text_field) do
    for video <- flatten_playlists(playlists) do
      video_to_document(video, text_field)
    end
  end

  defp add_closest_centroid_videos(clusterer, clustered_playlists, text_field) do
    clusters = clusterer.model.clusters
    {n_clusters, _} = clusters.shape
    embedder = clusterer.text_embedder

    for {playlist, i} <- Enum.with_index(clustered_playlists) do
      cluster = clusters[i..i]
      documents = [playlist] |> get_documents_from_playlists(text_field)

      closest_index = embedder |> TextMining.TextEmbedder.get_closest_index(cluster, documents)

      closest_video = documents |> Enum.at(closest_index)

      playlist
      |> Map.put(:representing_video, closest_video.metadata)
    end
  end
end
title_playlist_clusterer = YTOrg.PlaylistClusterer.new(playlists, :title, true)
:ok

Playlists from title clusters

clustered_interface = YoutubePlaylistInterface.new(title_playlist_clusterer.clustered_playlists)

clustered_interface |> YoutubePlaylistInterface.display()
:ok
clustered_interface |> YoutubePlaylistInterface.display()

Playlists from title clusters

interface = YoutubePlaylistInterface.new(title_clustered_playlists)
:ok
clusterer.model

Playlists from description clusters

interface = YoutubePlaylistInterface.new(description_clustered_playlists)
:ok