From 1e75644abb1db57beb97c6a97e0a023706ced498 Mon Sep 17 00:00:00 2001 From: AB-UK Date: Tue, 7 Apr 2026 22:34:39 +0100 Subject: [PATCH] feat(agent): switch LLM client from Ollama to OpenAI-compatible API (LM Studio support) - Replace /api/chat with /v1/chat/completions endpoint - Use json_schema response_format (LM Studio does not support json_object) - Make schema parameter optional in call_ollama to support different schemas per use case - Add dedicated normalize schema (normalized_metadata) with release_kind field instead of release_type to avoid model repetition loops - Add dedicated merge schema (artist_merge) so model no longer confuses normalize and merge response structures - Add retry with frequency_penalty=1.5 on parse failure to suppress repetition - Add id3 crate as fallback metadata reader for MP3 files with large embedded cover art that exceed Symphonia probe limit of 1MB Co-Authored-By: Claude Sonnet 4.6 (1M context) --- Cargo.lock | 1 + furumi-agent/src/ingest/normalize.rs | 81 +++++++++++++++++++--------- furumi-agent/src/merge.rs | 26 +++++++++ 3 files changed, 82 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1a2b982..d733eb8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1191,6 +1191,7 @@ dependencies = [ "tokio", "tokio-util", "tower 0.4.13", + "tower-http", "tracing", "tracing-subscriber", "urlencoding", diff --git a/furumi-agent/src/ingest/normalize.rs b/furumi-agent/src/ingest/normalize.rs index 541fb6f..087bab1 100644 --- a/furumi-agent/src/ingest/normalize.rs +++ b/furumi-agent/src/ingest/normalize.rs @@ -25,16 +25,35 @@ pub async fn normalize( ) -> anyhow::Result { let user_message = build_user_message(raw, hints, similar_artists, similar_albums, folder_ctx); + let schema = normalize_schema(); let response = call_ollama( &state.config.ollama_url, &state.config.ollama_model, &state.system_prompt, &user_message, state.config.ollama_auth.as_deref(), + 0.5, + Some(("normalized_metadata", schema.clone())), ) .await?; - parse_response(&response) + match parse_response(&response) { + Ok(fields) => Ok(fields), + Err(e) => { + tracing::warn!(error = %e, "LLM parse failed, retrying with higher frequency_penalty"); + let response2 = call_ollama( + &state.config.ollama_url, + &state.config.ollama_model, + &state.system_prompt, + &user_message, + state.config.ollama_auth.as_deref(), + 1.5, + Some(("normalized_metadata", schema)), + ) + .await?; + parse_response(&response2) + } + } } fn build_user_message( @@ -116,9 +135,12 @@ fn build_user_message( struct ChatRequest { model: String, messages: Vec, - response_format: ChatResponseFormat, + #[serde(skip_serializing_if = "Option::is_none")] + response_format: Option, stream: bool, temperature: f64, + max_tokens: u32, + frequency_penalty: f64, } #[derive(Serialize)] @@ -162,11 +184,18 @@ pub async fn call_ollama( system_prompt: &str, user_message: &str, auth: Option<&str>, + frequency_penalty: f64, + schema: Option<(&str, serde_json::Value)>, ) -> anyhow::Result { let client = reqwest::Client::builder() .timeout(std::time::Duration::from_secs(120)) .build()?; + let response_format = schema.map(|(name, schema)| ChatResponseFormat { + kind: "json_schema".to_owned(), + json_schema: JsonSchemaWrapper { name: name.to_owned(), strict: true, schema }, + }); + let request = ChatRequest { model: model.to_owned(), messages: vec![ @@ -179,32 +208,11 @@ pub async fn call_ollama( content: user_message.to_owned(), }, ], - response_format: ChatResponseFormat { - kind: "json_schema".to_owned(), - json_schema: JsonSchemaWrapper { - name: "normalized_metadata".to_owned(), - strict: true, - schema: serde_json::json!({ - "type": "object", - "properties": { - "artist": { "type": ["string", "null"] }, - "album": { "type": ["string", "null"] }, - "title": { "type": ["string", "null"] }, - "year": { "type": ["integer", "null"] }, - "track_number": { "type": ["integer", "null"] }, - "genre": { "type": ["string", "null"] }, - "featured_artists": { "type": "array", "items": { "type": "string" } }, - "release_type": { "type": ["string", "null"] }, - "confidence": { "type": ["number", "null"] }, - "notes": { "type": ["string", "null"] } - }, - "required": ["artist", "album", "title", "year", "track_number", "genre", "featured_artists", "release_type", "confidence", "notes"], - "additionalProperties": false - }), - }, - }, + response_format, stream: false, temperature: 0.1, + max_tokens: 512, + frequency_penalty, }; let url = format!("{}/v1/chat/completions", base_url.trim_end_matches('/')); @@ -242,6 +250,26 @@ pub async fn call_ollama( Ok(content) } +fn normalize_schema() -> serde_json::Value { + serde_json::json!({ + "type": "object", + "properties": { + "artist": { "type": ["string", "null"] }, + "album": { "type": ["string", "null"] }, + "title": { "type": ["string", "null"] }, + "year": { "type": ["integer", "null"] }, + "track_number": { "type": ["integer", "null"] }, + "genre": { "type": ["string", "null"] }, + "featured_artists": { "type": "array", "items": { "type": "string" } }, + "release_kind": { "type": ["string", "null"] }, + "confidence": { "type": ["number", "null"] }, + "notes": { "type": ["string", "null"] } + }, + "required": ["artist", "album", "title", "year", "track_number", "genre", "featured_artists", "release_kind", "confidence", "notes"], + "additionalProperties": false + }) +} + /// Parse the LLM JSON response into NormalizedFields. /// Handles both clean JSON and JSON wrapped in markdown code fences. fn parse_response(response: &str) -> anyhow::Result { @@ -266,6 +294,7 @@ fn parse_response(response: &str) -> anyhow::Result { genre: Option, #[serde(default)] featured_artists: Vec, + #[serde(rename = "release_kind")] release_type: Option, confidence: Option, notes: Option, diff --git a/furumi-agent/src/merge.rs b/furumi-agent/src/merge.rs index 25f6e5b..3ab95a7 100644 --- a/furumi-agent/src/merge.rs +++ b/furumi-agent/src/merge.rs @@ -35,12 +35,38 @@ pub async fn propose_merge(state: &Arc, merge_id: Uuid) -> anyhow::Res let user_message = build_merge_message(&artists_data); + let schema = serde_json::json!({ + "type": "object", + "properties": { + "canonical_artist_name": { "type": "string" }, + "winner_artist_id": { "type": "integer" }, + "album_mappings": { + "type": "array", + "items": { + "type": "object", + "properties": { + "source_album_id": { "type": "integer" }, + "canonical_name": { "type": "string" }, + "merge_into_album_id": { "type": ["integer", "null"] } + }, + "required": ["source_album_id", "canonical_name", "merge_into_album_id"], + "additionalProperties": false + } + }, + "notes": { "type": "string" } + }, + "required": ["canonical_artist_name", "winner_artist_id", "album_mappings", "notes"], + "additionalProperties": false + }); + let response = call_ollama( &state.config.ollama_url, &state.config.ollama_model, &state.merge_prompt, &user_message, state.config.ollama_auth.as_deref(), + 0.5, + Some(("artist_merge", schema)), ).await?; let proposal = parse_merge_response(&response)?;