Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,9 @@
from langchain_community.document_loaders.gcs_file import (
GCSFileLoader,
)
from langchain_community.document_loaders.genius import (
GeniusLoader,
)
from langchain_community.document_loaders.geodataframe import (
GeoDataFrameLoader,
)
Expand Down Expand Up @@ -814,6 +817,7 @@ def __getattr__(name: str) -> Any:
"GlueCatalogLoader",
"GCSFileLoader",
"GeoDataFrameLoader",
"GeniusLoader",
"GitHubIssuesLoader",
"GitLoader",
"GitbookLoader",
Expand Down
55 changes: 55 additions & 0 deletions libs/community/langchain_community/document_loaders/genius.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from typing import Iterator

from langchain_core.document_loaders import BaseLoader
from langchain_core.documents import Document


class GeniusLoader(BaseLoader):
"""Load lyrics using the Genius API.

This loader utilizes the `lyricsgenius` Python package to fetch song lyrics
and metadata. You need a Genius API token, which can be generated at
https://genius.com/api-clients.
"""

def __init__(self, search_query: str, api_token: str = None):
"""Initialize with search query and API token.

Args:
search_query: The search query (e.g., "Imagine Dragons - Radioactive").
api_token: Genius API Token. If not provided, looks for GENIUS_ACCESS_TOKEN env var.
"""
self.search_query = search_query
self.api_token = api_token

def lazy_load(self) -> Iterator[Document]:
"""Load lyrics and metadata."""
try:
import lyricsgenius
except ImportError:
raise ImportError(
"lyricsgenius package not found, please install it with "
"`pip install lyricsgenius`"
)

# Initialize Genius client
genius = lyricsgenius.Genius(self.api_token)

# Search for the song (we use the first best match)
song = genius.search_song(self.search_query)

# If no song is found, yield nothing (empty iterator)
if not song:
return

# Create the LangChain Document
metadata = {
"source": "Genius",
"title": song.title,
"artist": song.artist,
"album": song.album,
"url": song.url,
"id": song.id,
}

yield Document(page_content=song.lyrics, metadata=metadata)
36 changes: 36 additions & 0 deletions test_run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# We try to import it from the top-level package
# If this works, it means your __init__.py edit was successful
try:
from langchain_community.document_loaders import GeniusLoader

print("✅ STEP 1 PASSED: GeniusLoader was imported successfully!")
except ImportError as e:
print("❌ STEP 1 FAILED: Could not import GeniusLoader.")
print(e)
exit()

# Now we try to use it
# Note: If you don't have a real token, we expect an error, but NOT a crash.
try:
print("attempting to initialize loader...")
# Replace 'fake_token' with a real one if you want actual lyrics
loader = GeniusLoader("Taylor Swift", api_token="fake_token")
print("✅ STEP 2 PASSED: Loader initialized!")

print("Attempting to load data...")
docs = list(loader.lazy_load())

if docs:
print(f"✅ STEP 3 PASSED: Found song: {docs[0].metadata['title']}")
else:
print("⚠️ STEP 3: No docs found (Expected if token is fake).")

except Exception as e:
# If it fails because of the API token, that is actually GOOD.
# It means your code ran and tried to hit the API.
if "401" in str(e) or "403" in str(e) or "Token" in str(e):
print(
"✅ STEP 3 PASSED: Code ran! (API rejected the fake token, which is expected)."
)
else:
print(f"❌ STEP 3 FAILED with unexpected error: {e}")