use std::sync::Arc; use serde::{Deserialize, Serialize}; use crate::db::{NormalizedFields, SimilarAlbum, SimilarArtist}; use crate::web::AppState; use super::metadata::RawMetadata; #[derive(Debug)] pub struct FolderContext { pub folder_path: String, // path relative to inbox_dir (e.g. "Kunteynir/Синглы/Пьюк") pub folder_files: Vec, // audio filenames in the same folder pub track_count: usize, // number of audio files in folder } /// Build the user message with all context and call Ollama for normalization. pub async fn normalize( state: &Arc, raw: &RawMetadata, hints: &crate::db::PathHints, similar_artists: &[SimilarArtist], similar_albums: &[SimilarAlbum], folder_ctx: Option<&FolderContext>, ) -> anyhow::Result { let user_message = build_user_message(raw, hints, similar_artists, similar_albums, folder_ctx); let schema = normalize_schema(); let response = call_ollama( &state.config.ollama_url, &state.config.ollama_model, &state.system_prompt, &user_message, state.config.ollama_auth.as_deref(), 0.5, 512, Some(("normalized_metadata", schema.clone())), ) .await?; match parse_response(&response) { Ok(fields) => Ok(fields), Err(e) => { tracing::warn!(error = %e, "LLM parse failed, retrying with higher frequency_penalty"); let response2 = call_ollama( &state.config.ollama_url, &state.config.ollama_model, &state.system_prompt, &user_message, state.config.ollama_auth.as_deref(), 1.5, 512, Some(("normalized_metadata", schema)), ) .await?; parse_response(&response2) } } } fn build_user_message( raw: &RawMetadata, hints: &crate::db::PathHints, similar_artists: &[SimilarArtist], similar_albums: &[SimilarAlbum], folder_ctx: Option<&FolderContext>, ) -> String { let mut msg = String::from("## Raw metadata from file tags\n"); if let Some(v) = &raw.title { msg.push_str(&format!("Title: \"{}\"\n", v)); } if let Some(v) = &raw.artist { msg.push_str(&format!("Artist: \"{}\"\n", v)); } if let Some(v) = &raw.album { msg.push_str(&format!("Album: \"{}\"\n", v)); } if let Some(v) = raw.year { msg.push_str(&format!("Year: {}\n", v)); } if let Some(v) = raw.track_number { msg.push_str(&format!("Track number: {}\n", v)); } if let Some(v) = &raw.genre { msg.push_str(&format!("Genre: \"{}\"\n", v)); } msg.push_str("\n## Hints from file path\n"); if let Some(v) = &hints.artist { msg.push_str(&format!("Path artist: \"{}\"\n", v)); } if let Some(v) = &hints.album { msg.push_str(&format!("Path album: \"{}\"\n", v)); } if let Some(v) = hints.year { msg.push_str(&format!("Path year: {}\n", v)); } if let Some(v) = hints.track_number { msg.push_str(&format!("Path track number: {}\n", v)); } if let Some(v) = &hints.title { msg.push_str(&format!("Path title: \"{}\"\n", v)); } if !similar_artists.is_empty() { msg.push_str("\n## Existing artists in database (similar matches)\n"); for a in similar_artists { msg.push_str(&format!("- \"{}\" (similarity: {:.2})\n", a.name, a.similarity)); } } if !similar_albums.is_empty() { msg.push_str("\n## Existing albums in database (similar matches)\n"); for a in similar_albums { let year_str = a.year.map(|y| format!(", year: {}", y)).unwrap_or_default(); msg.push_str(&format!("- \"{}\" (similarity: {:.2}{})\n", a.name, a.similarity, year_str)); } } if let Some(ctx) = folder_ctx { msg.push_str("\n## Folder context\n"); msg.push_str(&format!("Folder path: \"{}\"\n", ctx.folder_path)); msg.push_str(&format!("Track count in folder: {}\n", ctx.track_count)); if !ctx.folder_files.is_empty() { msg.push_str("Files in folder:\n"); for f in &ctx.folder_files { msg.push_str(&format!(" - {}\n", f)); } } } msg } #[derive(Serialize)] struct ChatRequest { model: String, messages: Vec, #[serde(skip_serializing_if = "Option::is_none")] response_format: Option, stream: bool, temperature: f64, max_tokens: u32, frequency_penalty: f64, } #[derive(Serialize)] struct ChatMessage { role: String, content: String, } #[derive(Serialize)] struct ChatResponseFormat { #[serde(rename = "type")] kind: String, json_schema: JsonSchemaWrapper, } #[derive(Serialize)] struct JsonSchemaWrapper { name: String, strict: bool, schema: serde_json::Value, } #[derive(Deserialize)] struct ChatResponse { choices: Vec, } #[derive(Deserialize)] struct ChatChoice { message: ChatResponseMessage, } #[derive(Deserialize)] struct ChatResponseMessage { content: String, } pub async fn call_ollama( base_url: &str, model: &str, system_prompt: &str, user_message: &str, auth: Option<&str>, frequency_penalty: f64, max_tokens: u32, schema: Option<(&str, serde_json::Value)>, ) -> anyhow::Result { let client = reqwest::Client::builder() .timeout(std::time::Duration::from_secs(120)) .build()?; let response_format = schema.map(|(name, schema)| ChatResponseFormat { kind: "json_schema".to_owned(), json_schema: JsonSchemaWrapper { name: name.to_owned(), strict: true, schema }, }); let request = ChatRequest { model: model.to_owned(), messages: vec![ ChatMessage { role: "system".to_owned(), content: system_prompt.to_owned(), }, ChatMessage { role: "user".to_owned(), content: user_message.to_owned(), }, ], response_format, stream: false, temperature: 0.1, max_tokens, frequency_penalty, }; let url = format!("{}/v1/chat/completions", base_url.trim_end_matches('/')); tracing::info!(%url, model, prompt_len = user_message.len(), "Calling LLM API..."); let start = std::time::Instant::now(); let mut req = client.post(&url).json(&request); if let Some(auth_header) = auth { req = req.header("Authorization", auth_header); } let resp = req.send().await?; let elapsed = start.elapsed(); if !resp.status().is_success() { let status = resp.status(); let body = resp.text().await.unwrap_or_default(); tracing::error!(%status, body = &body[..body.len().min(500)], "LLM API error"); anyhow::bail!("LLM returned {}: {}", status, body); } let chat_resp: ChatResponse = resp.json().await?; let content = chat_resp .choices .into_iter() .next() .ok_or_else(|| anyhow::anyhow!("LLM returned empty choices"))? .message .content; tracing::info!( elapsed_ms = elapsed.as_millis() as u64, response_len = content.len(), "LLM response received" ); tracing::debug!(raw_response = %content, "LLM raw output"); Ok(content) } fn normalize_schema() -> serde_json::Value { serde_json::json!({ "type": "object", "properties": { "artist": { "type": ["string", "null"] }, "album": { "type": ["string", "null"] }, "title": { "type": ["string", "null"] }, "year": { "type": ["integer", "null"] }, "track_number": { "type": ["integer", "null"] }, "genre": { "type": ["string", "null"] }, "featured_artists": { "type": "array", "items": { "type": "string" } }, "release_kind": { "type": ["string", "null"] }, "confidence": { "type": ["number", "null"] }, "notes": { "type": ["string", "null"] } }, "required": ["artist", "album", "title", "year", "track_number", "genre", "featured_artists", "release_kind", "confidence", "notes"], "additionalProperties": false }) } /// Parse the LLM JSON response into NormalizedFields. /// Handles both clean JSON and JSON wrapped in markdown code fences. fn parse_response(response: &str) -> anyhow::Result { let cleaned = response.trim(); // Strip markdown code fences if present let json_str = if cleaned.starts_with("```") { let start = cleaned.find('{').unwrap_or(0); let end = cleaned.rfind('}').map(|i| i + 1).unwrap_or(cleaned.len()); &cleaned[start..end] } else { cleaned }; #[derive(Deserialize)] struct LlmOutput { artist: Option, album: Option, title: Option, year: Option, track_number: Option, genre: Option, #[serde(default)] featured_artists: Vec, #[serde(rename = "release_kind")] release_type: Option, confidence: Option, notes: Option, } let parsed: LlmOutput = serde_json::from_str(json_str) .map_err(|e| anyhow::anyhow!("Failed to parse LLM response as JSON: {} — raw: {}", e, response))?; Ok(NormalizedFields { title: parsed.title, artist: parsed.artist, album: parsed.album, year: parsed.year, track_number: parsed.track_number, genre: parsed.genre, featured_artists: parsed.featured_artists, release_type: parsed.release_type, confidence: parsed.confidence, notes: parsed.notes, }) }