Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 68 additions & 13 deletions src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ use std::path::Path;
use std::time::Duration;

use crate::{
convert_recipe_with_config, convert_recipe_with_provider, fetch_recipe_with_timeout,
ocr::ocr_image_file, ImportError, Recipe,
convert_recipe_with_config_and_language, convert_recipe_with_provider_and_language,
fetch_recipe_with_timeout_and_language, ocr::ocr_image_file, ImportError, Recipe,
};

/// Represents the input source for a recipe
Expand Down Expand Up @@ -68,6 +68,7 @@ pub struct RecipeImporterBuilder {
timeout: Option<Duration>,
api_key: Option<String>,
model: Option<String>,
recipe_language: Option<String>,
}

impl RecipeImporterBuilder {
Expand Down Expand Up @@ -206,6 +207,17 @@ impl RecipeImporterBuilder {
self
}

/// Hint the language the recipe text is written in when using LLM parsing.
///
/// If not set, the existing prompts are used unchanged.
pub fn recipe_language(mut self, language: impl Into<String>) -> Self {
let lang = language.into().trim().to_string();
if !lang.is_empty() {
self.recipe_language = Some(lang);
}
self
}

/// Build and execute the recipe import operation
///
/// # Returns
Expand Down Expand Up @@ -245,19 +257,40 @@ impl RecipeImporterBuilder {
match (source, self.mode) {
// Use Case 1: URL → Cooklang
(InputSource::Url(url), OutputMode::Cooklang) => {
let recipe = fetch_recipe_with_timeout(&url, self.timeout).await?;
let recipe = fetch_recipe_with_timeout_and_language(
&url,
self.timeout,
self.recipe_language.as_deref(),
)
.await?;
let cooklang = if self.api_key.is_some() || self.model.is_some() {
convert_recipe_with_config(&recipe, provider_name, self.api_key, self.model)
.await?
convert_recipe_with_config_and_language(
&recipe,
provider_name,
self.api_key,
self.model,
self.recipe_language.as_deref(),
)
.await?
} else {
convert_recipe_with_provider(&recipe, provider_name).await?
convert_recipe_with_provider_and_language(
&recipe,
provider_name,
self.recipe_language.as_deref(),
)
.await?
};
Ok(ImportResult::Cooklang(cooklang))
}

// Use Case 2: URL → Recipe (extract only)
(InputSource::Url(url), OutputMode::Recipe) => {
let recipe = fetch_recipe_with_timeout(&url, self.timeout).await?;
let recipe = fetch_recipe_with_timeout_and_language(
&url,
self.timeout,
self.recipe_language.as_deref(),
)
.await?;
Ok(ImportResult::Recipe(recipe))
}

Expand All @@ -277,10 +310,21 @@ impl RecipeImporterBuilder {
};

let cooklang = if self.api_key.is_some() || self.model.is_some() {
convert_recipe_with_config(&recipe, provider_name, self.api_key, self.model)
.await?
convert_recipe_with_config_and_language(
&recipe,
provider_name,
self.api_key,
self.model,
self.recipe_language.as_deref(),
)
.await?
} else {
convert_recipe_with_provider(&recipe, provider_name).await?
convert_recipe_with_provider_and_language(
&recipe,
provider_name,
self.recipe_language.as_deref(),
)
.await?
};
Ok(ImportResult::Cooklang(cooklang))
}
Expand Down Expand Up @@ -315,10 +359,21 @@ impl RecipeImporterBuilder {

// Convert to Cooklang
let cooklang = if self.api_key.is_some() || self.model.is_some() {
convert_recipe_with_config(&recipe, provider_name, self.api_key, self.model)
.await?
convert_recipe_with_config_and_language(
&recipe,
provider_name,
self.api_key,
self.model,
self.recipe_language.as_deref(),
)
.await?
} else {
convert_recipe_with_provider(&recipe, provider_name).await?
convert_recipe_with_provider_and_language(
&recipe,
provider_name,
self.recipe_language.as_deref(),
)
.await?
};
Ok(ImportResult::Cooklang(cooklang))
}
Expand Down
4 changes: 4 additions & 0 deletions src/extractors/json_ld.rs
Original file line number Diff line number Diff line change
Expand Up @@ -787,6 +787,7 @@ mod tests {
url: "http://example.com".to_string(),
document,
texts: None,
recipe_language: None,
};
let extractor = JsonLdExtractor;
// Just verify that parse returns an error for invalid input
Expand Down Expand Up @@ -821,6 +822,7 @@ mod tests {
url: "http://example.com".to_string(),
document,
texts: None,
recipe_language: None,
};

let result = extractor.parse(&context).unwrap();
Expand Down Expand Up @@ -894,6 +896,7 @@ mod tests {
url: "http://example.com".to_string(),
document,
texts: None,
recipe_language: None,
};

let result = extractor.parse(&context).unwrap();
Expand Down Expand Up @@ -940,6 +943,7 @@ mod tests {
url: "http://example.com".to_string(),
document,
texts: None,
recipe_language: None,
};

let result = extractor.parse(&context).unwrap();
Expand Down
1 change: 1 addition & 0 deletions src/extractors/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ pub struct ParsingContext {
pub url: String,
pub document: Html,
pub texts: Option<String>,
pub recipe_language: Option<String>,
}

pub trait Extractor {
Expand Down
26 changes: 23 additions & 3 deletions src/extractors/plain_text_llm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,23 @@ const MODEL: &str = "gpt-4o-mini";

pub struct PlainTextLlmExtractor;

fn build_plaintext_prompt(language: Option<&str>) -> String {
match language.and_then(|lang| {
let trimmed = lang.trim();
if trimmed.is_empty() {
None
} else {
Some(trimmed)
}
}) {
Some(lang) => format!(
"{}\nThe text you receive is written in {lang}. Extract ingredients and instructions that are written in {lang} while keeping their wording in that language.",
PROMPT
),
None => PROMPT.to_string(),
}
}

#[async_trait::async_trait]
impl Extractor for PlainTextLlmExtractor {
fn parse(&self, context: &ParsingContext) -> Result<Recipe, Box<dyn std::error::Error>> {
Expand All @@ -45,8 +62,9 @@ impl Extractor for PlainTextLlmExtractor {
.map(|el| el.inner_html().trim().to_string())
.unwrap_or_else(|| "Untitled Recipe".to_string());

let prompt = build_plaintext_prompt(context.recipe_language.as_deref());
let json = tokio::task::block_in_place(|| {
tokio::runtime::Handle::current().block_on(fetch_json(texts))
tokio::runtime::Handle::current().block_on(fetch_json(texts, &prompt))
})?;

if let Some(error) = json["error"].as_str() {
Expand Down Expand Up @@ -117,7 +135,7 @@ async fn fetch_inner_text(url: &str) -> Result<String, Box<dyn Error>> {
Ok(content.content)
}

async fn fetch_json(texts: String) -> Result<Value, Box<dyn Error>> {
async fn fetch_json(texts: String, prompt: &str) -> Result<Value, Box<dyn Error>> {
let api_key = std::env::var("OPENAI_API_KEY")?;

// For testing environment, return mock data
Expand All @@ -137,7 +155,7 @@ async fn fetch_json(texts: String) -> Result<Value, Box<dyn Error>> {
"messages": [
{
"role": "system",
"content": PROMPT
"content": prompt
},
{
"role": "user",
Expand Down Expand Up @@ -330,6 +348,7 @@ mod tests {
url: "http://example.com".to_string(),
document,
texts: None,
recipe_language: None,
};
let extractor = PlainTextLlmExtractor;
// Set up environment for test
Expand Down Expand Up @@ -361,6 +380,7 @@ mod tests {
url: "http://example.com".to_string(),
document,
texts: None,
recipe_language: None,
};
let extractor = PlainTextLlmExtractor;

Expand Down
45 changes: 43 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,17 @@ pub async fn fetch_recipe(url: &str) -> Result<model::Recipe, ImportError> {
fetch_recipe_with_timeout(url, None).await
}

/// Fetches and extracts a recipe from a URL with an explicit language hint.
///
/// This is identical to `fetch_recipe` but allows specifying the language
/// the recipe is written in for LLM-based extraction fallbacks.
pub async fn fetch_recipe_with_language(
url: &str,
recipe_language: Option<&str>,
) -> Result<model::Recipe, ImportError> {
fetch_recipe_with_timeout_and_language(url, None, recipe_language).await
}

/// Fetches and extracts a recipe from a URL.
///
/// This function performs the following steps:
Expand All @@ -59,6 +70,15 @@ pub async fn fetch_recipe(url: &str) -> Result<model::Recipe, ImportError> {
pub async fn fetch_recipe_with_timeout(
url: &str,
timeout: Option<std::time::Duration>,
) -> Result<model::Recipe, ImportError> {
fetch_recipe_with_timeout_and_language(url, timeout, None).await
}

/// Fetches and extracts a recipe from a URL with an optional timeout and language hint.
pub async fn fetch_recipe_with_timeout_and_language(
url: &str,
timeout: Option<std::time::Duration>,
recipe_language: Option<&str>,
) -> Result<model::Recipe, ImportError> {
// Set up headers with a user agent
let mut headers = HeaderMap::new();
Expand All @@ -84,6 +104,7 @@ pub async fn fetch_recipe_with_timeout(
url: url.to_string(),
document: Html::parse_document(&body),
texts: None,
recipe_language: recipe_language.map(|lang| lang.to_string()),
};

let extractors_list: Vec<Box<dyn Extractor>> = vec![
Expand Down Expand Up @@ -155,6 +176,17 @@ pub async fn convert_recipe_with_config(
provider_name: Option<&str>,
api_key: Option<String>,
model: Option<String>,
) -> Result<String, ImportError> {
convert_recipe_with_config_and_language(recipe, provider_name, api_key, model, None).await
}

/// Converts a recipe to Cooklang format with explicit configuration and an optional language hint.
pub async fn convert_recipe_with_config_and_language(
recipe: &model::Recipe,
provider_name: Option<&str>,
api_key: Option<String>,
model: Option<String>,
recipe_language: Option<&str>,
) -> Result<String, ImportError> {
use crate::config::ProviderConfig;
use crate::providers::{AnthropicProvider, OpenAIProvider};
Expand Down Expand Up @@ -214,7 +246,7 @@ pub async fn convert_recipe_with_config(

// Convert using the provider
let mut cooklang_recipe = converter
.convert(&recipe.content)
.convert(&recipe.content, recipe_language)
.await
.map_err(|e| ImportError::ConversionError(e.to_string()))?;

Expand Down Expand Up @@ -246,6 +278,15 @@ pub async fn convert_recipe_with_config(
pub async fn convert_recipe_with_provider(
recipe: &model::Recipe,
provider_name: Option<&str>,
) -> Result<String, ImportError> {
convert_recipe_with_provider_and_language(recipe, provider_name, None).await
}

/// Converts a recipe to Cooklang format using a custom provider with an optional language hint.
pub async fn convert_recipe_with_provider_and_language(
recipe: &model::Recipe,
provider_name: Option<&str>,
recipe_language: Option<&str>,
) -> Result<String, ImportError> {
use crate::config::AiConfig;
use crate::providers::{OpenAIProvider, ProviderFactory};
Expand Down Expand Up @@ -314,7 +355,7 @@ pub async fn convert_recipe_with_provider(

// Convert using the provider
let mut cooklang_recipe = converter
.convert(&recipe.content)
.convert(&recipe.content, recipe_language)
.await
.map_err(|e| ImportError::ConversionError(e.to_string()))?;

Expand Down
Loading
Loading