Files
furumi-ng/furumi-agent/src/ingest/normalize.rs
T

319 lines
9.8 KiB
Rust
Raw Normal View History

2026-03-18 02:21:00 +00:00
use std::sync::Arc;
use serde::{Deserialize, Serialize};
use crate::db::{NormalizedFields, SimilarAlbum, SimilarArtist};
use crate::web::AppState;
use super::metadata::RawMetadata;
2026-03-19 13:24:48 +00:00
#[derive(Debug)]
pub struct FolderContext {
pub folder_path: String, // path relative to inbox_dir (e.g. "Kunteynir/Синглы/Пьюк")
pub folder_files: Vec<String>, // audio filenames in the same folder
pub track_count: usize, // number of audio files in folder
}
2026-03-18 02:21:00 +00:00
/// Build the user message with all context and call Ollama for normalization.
pub async fn normalize(
state: &Arc<AppState>,
raw: &RawMetadata,
hints: &crate::db::PathHints,
similar_artists: &[SimilarArtist],
similar_albums: &[SimilarAlbum],
2026-03-19 13:24:48 +00:00
folder_ctx: Option<&FolderContext>,
2026-03-18 02:21:00 +00:00
) -> anyhow::Result<NormalizedFields> {
2026-03-19 13:24:48 +00:00
let user_message = build_user_message(raw, hints, similar_artists, similar_albums, folder_ctx);
2026-03-18 02:21:00 +00:00
let schema = normalize_schema();
2026-03-18 02:21:00 +00:00
let response = call_ollama(
&state.config.ollama_url,
&state.config.ollama_model,
&state.system_prompt,
&user_message,
2026-03-18 13:04:13 +00:00
state.config.ollama_auth.as_deref(),
0.5,
Some(("normalized_metadata", schema.clone())),
2026-03-18 02:21:00 +00:00
)
.await?;
match parse_response(&response) {
Ok(fields) => Ok(fields),
Err(e) => {
tracing::warn!(error = %e, "LLM parse failed, retrying with higher frequency_penalty");
let response2 = call_ollama(
&state.config.ollama_url,
&state.config.ollama_model,
&state.system_prompt,
&user_message,
state.config.ollama_auth.as_deref(),
1.5,
Some(("normalized_metadata", schema)),
)
.await?;
parse_response(&response2)
}
}
2026-03-18 02:21:00 +00:00
}
fn build_user_message(
raw: &RawMetadata,
hints: &crate::db::PathHints,
similar_artists: &[SimilarArtist],
similar_albums: &[SimilarAlbum],
2026-03-19 13:24:48 +00:00
folder_ctx: Option<&FolderContext>,
2026-03-18 02:21:00 +00:00
) -> String {
let mut msg = String::from("## Raw metadata from file tags\n");
if let Some(v) = &raw.title {
msg.push_str(&format!("Title: \"{}\"\n", v));
}
if let Some(v) = &raw.artist {
msg.push_str(&format!("Artist: \"{}\"\n", v));
}
if let Some(v) = &raw.album {
msg.push_str(&format!("Album: \"{}\"\n", v));
}
if let Some(v) = raw.year {
msg.push_str(&format!("Year: {}\n", v));
}
if let Some(v) = raw.track_number {
msg.push_str(&format!("Track number: {}\n", v));
}
if let Some(v) = &raw.genre {
msg.push_str(&format!("Genre: \"{}\"\n", v));
}
msg.push_str("\n## Hints from file path\n");
if let Some(v) = &hints.artist {
msg.push_str(&format!("Path artist: \"{}\"\n", v));
}
if let Some(v) = &hints.album {
msg.push_str(&format!("Path album: \"{}\"\n", v));
}
if let Some(v) = hints.year {
msg.push_str(&format!("Path year: {}\n", v));
}
if let Some(v) = hints.track_number {
msg.push_str(&format!("Path track number: {}\n", v));
}
if let Some(v) = &hints.title {
msg.push_str(&format!("Path title: \"{}\"\n", v));
}
if !similar_artists.is_empty() {
msg.push_str("\n## Existing artists in database (similar matches)\n");
for a in similar_artists {
msg.push_str(&format!("- \"{}\" (similarity: {:.2})\n", a.name, a.similarity));
}
}
if !similar_albums.is_empty() {
msg.push_str("\n## Existing albums in database (similar matches)\n");
for a in similar_albums {
let year_str = a.year.map(|y| format!(", year: {}", y)).unwrap_or_default();
msg.push_str(&format!("- \"{}\" (similarity: {:.2}{})\n", a.name, a.similarity, year_str));
}
}
2026-03-19 13:24:48 +00:00
if let Some(ctx) = folder_ctx {
msg.push_str("\n## Folder context\n");
msg.push_str(&format!("Folder path: \"{}\"\n", ctx.folder_path));
msg.push_str(&format!("Track count in folder: {}\n", ctx.track_count));
if !ctx.folder_files.is_empty() {
msg.push_str("Files in folder:\n");
for f in &ctx.folder_files {
msg.push_str(&format!(" - {}\n", f));
}
}
}
2026-03-18 02:21:00 +00:00
msg
}
#[derive(Serialize)]
2026-04-07 19:32:17 +01:00
struct ChatRequest {
2026-03-18 02:21:00 +00:00
model: String,
2026-04-07 19:32:17 +01:00
messages: Vec<ChatMessage>,
#[serde(skip_serializing_if = "Option::is_none")]
response_format: Option<ChatResponseFormat>,
2026-03-18 02:21:00 +00:00
stream: bool,
2026-04-07 19:32:17 +01:00
temperature: f64,
max_tokens: u32,
frequency_penalty: f64,
2026-03-18 02:21:00 +00:00
}
#[derive(Serialize)]
2026-04-07 19:32:17 +01:00
struct ChatMessage {
2026-03-18 02:21:00 +00:00
role: String,
content: String,
}
#[derive(Serialize)]
2026-04-07 19:32:17 +01:00
struct ChatResponseFormat {
#[serde(rename = "type")]
kind: String,
2026-04-07 19:52:03 +01:00
json_schema: JsonSchemaWrapper,
}
#[derive(Serialize)]
struct JsonSchemaWrapper {
name: String,
strict: bool,
schema: serde_json::Value,
2026-03-18 02:21:00 +00:00
}
#[derive(Deserialize)]
2026-04-07 19:32:17 +01:00
struct ChatResponse {
choices: Vec<ChatChoice>,
2026-03-18 02:21:00 +00:00
}
#[derive(Deserialize)]
2026-04-07 19:32:17 +01:00
struct ChatChoice {
message: ChatResponseMessage,
}
#[derive(Deserialize)]
struct ChatResponseMessage {
2026-03-18 02:21:00 +00:00
content: String,
}
2026-03-19 00:55:49 +00:00
pub async fn call_ollama(
2026-03-18 02:21:00 +00:00
base_url: &str,
model: &str,
system_prompt: &str,
user_message: &str,
2026-03-18 13:04:13 +00:00
auth: Option<&str>,
frequency_penalty: f64,
schema: Option<(&str, serde_json::Value)>,
2026-03-18 02:21:00 +00:00
) -> anyhow::Result<String> {
let client = reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(120))
.build()?;
let response_format = schema.map(|(name, schema)| ChatResponseFormat {
kind: "json_schema".to_owned(),
json_schema: JsonSchemaWrapper { name: name.to_owned(), strict: true, schema },
});
2026-04-07 19:32:17 +01:00
let request = ChatRequest {
2026-03-18 02:21:00 +00:00
model: model.to_owned(),
messages: vec![
2026-04-07 19:32:17 +01:00
ChatMessage {
2026-03-18 02:21:00 +00:00
role: "system".to_owned(),
content: system_prompt.to_owned(),
},
2026-04-07 19:32:17 +01:00
ChatMessage {
2026-03-18 02:21:00 +00:00
role: "user".to_owned(),
content: user_message.to_owned(),
},
],
response_format,
2026-03-18 02:21:00 +00:00
stream: false,
2026-04-07 19:32:17 +01:00
temperature: 0.1,
max_tokens: 512,
frequency_penalty,
2026-03-18 02:21:00 +00:00
};
2026-04-07 19:32:17 +01:00
let url = format!("{}/v1/chat/completions", base_url.trim_end_matches('/'));
tracing::info!(%url, model, prompt_len = user_message.len(), "Calling LLM API...");
2026-03-18 02:21:00 +00:00
let start = std::time::Instant::now();
2026-03-18 13:04:13 +00:00
let mut req = client.post(&url).json(&request);
if let Some(auth_header) = auth {
req = req.header("Authorization", auth_header);
}
let resp = req.send().await?;
2026-03-18 02:21:00 +00:00
let elapsed = start.elapsed();
if !resp.status().is_success() {
let status = resp.status();
let body = resp.text().await.unwrap_or_default();
2026-04-07 19:32:17 +01:00
tracing::error!(%status, body = &body[..body.len().min(500)], "LLM API error");
anyhow::bail!("LLM returned {}: {}", status, body);
}
let chat_resp: ChatResponse = resp.json().await?;
let content = chat_resp
.choices
.into_iter()
.next()
.ok_or_else(|| anyhow::anyhow!("LLM returned empty choices"))?
.message
.content;
2026-03-18 02:21:00 +00:00
tracing::info!(
elapsed_ms = elapsed.as_millis() as u64,
2026-04-07 19:32:17 +01:00
response_len = content.len(),
"LLM response received"
2026-03-18 02:21:00 +00:00
);
2026-04-07 19:32:17 +01:00
tracing::debug!(raw_response = %content, "LLM raw output");
Ok(content)
2026-03-18 02:21:00 +00:00
}
fn normalize_schema() -> serde_json::Value {
serde_json::json!({
"type": "object",
"properties": {
"artist": { "type": ["string", "null"] },
"album": { "type": ["string", "null"] },
"title": { "type": ["string", "null"] },
"year": { "type": ["integer", "null"] },
"track_number": { "type": ["integer", "null"] },
"genre": { "type": ["string", "null"] },
"featured_artists": { "type": "array", "items": { "type": "string" } },
"release_kind": { "type": ["string", "null"] },
"confidence": { "type": ["number", "null"] },
"notes": { "type": ["string", "null"] }
},
"required": ["artist", "album", "title", "year", "track_number", "genre", "featured_artists", "release_kind", "confidence", "notes"],
"additionalProperties": false
})
}
2026-03-18 02:21:00 +00:00
/// Parse the LLM JSON response into NormalizedFields.
/// Handles both clean JSON and JSON wrapped in markdown code fences.
fn parse_response(response: &str) -> anyhow::Result<NormalizedFields> {
let cleaned = response.trim();
// Strip markdown code fences if present
let json_str = if cleaned.starts_with("```") {
let start = cleaned.find('{').unwrap_or(0);
let end = cleaned.rfind('}').map(|i| i + 1).unwrap_or(cleaned.len());
&cleaned[start..end]
} else {
cleaned
};
#[derive(Deserialize)]
struct LlmOutput {
artist: Option<String>,
album: Option<String>,
title: Option<String>,
year: Option<i32>,
track_number: Option<i32>,
genre: Option<String>,
#[serde(default)]
featured_artists: Vec<String>,
#[serde(rename = "release_kind")]
2026-03-19 13:24:48 +00:00
release_type: Option<String>,
2026-03-18 02:21:00 +00:00
confidence: Option<f64>,
notes: Option<String>,
}
let parsed: LlmOutput = serde_json::from_str(json_str)
.map_err(|e| anyhow::anyhow!("Failed to parse LLM response as JSON: {} — raw: {}", e, response))?;
Ok(NormalizedFields {
title: parsed.title,
artist: parsed.artist,
album: parsed.album,
year: parsed.year,
track_number: parsed.track_number,
genre: parsed.genre,
featured_artists: parsed.featured_artists,
2026-03-19 13:24:48 +00:00
release_type: parsed.release_type,
2026-03-18 02:21:00 +00:00
confidence: parsed.confidence,
notes: parsed.notes,
})
}