Added AI agent to manage metadata
This commit is contained in:
216
furumi-agent/src/ingest/normalize.rs
Normal file
216
furumi-agent/src/ingest/normalize.rs
Normal file
@@ -0,0 +1,216 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::db::{NormalizedFields, SimilarAlbum, SimilarArtist};
|
||||
use crate::web::AppState;
|
||||
|
||||
use super::metadata::RawMetadata;
|
||||
|
||||
/// Build the user message with all context and call Ollama for normalization.
|
||||
pub async fn normalize(
|
||||
state: &Arc<AppState>,
|
||||
raw: &RawMetadata,
|
||||
hints: &crate::db::PathHints,
|
||||
similar_artists: &[SimilarArtist],
|
||||
similar_albums: &[SimilarAlbum],
|
||||
) -> anyhow::Result<NormalizedFields> {
|
||||
let user_message = build_user_message(raw, hints, similar_artists, similar_albums);
|
||||
|
||||
let response = call_ollama(
|
||||
&state.config.ollama_url,
|
||||
&state.config.ollama_model,
|
||||
&state.system_prompt,
|
||||
&user_message,
|
||||
)
|
||||
.await?;
|
||||
|
||||
parse_response(&response)
|
||||
}
|
||||
|
||||
fn build_user_message(
|
||||
raw: &RawMetadata,
|
||||
hints: &crate::db::PathHints,
|
||||
similar_artists: &[SimilarArtist],
|
||||
similar_albums: &[SimilarAlbum],
|
||||
) -> String {
|
||||
let mut msg = String::from("## Raw metadata from file tags\n");
|
||||
|
||||
if let Some(v) = &raw.title {
|
||||
msg.push_str(&format!("Title: \"{}\"\n", v));
|
||||
}
|
||||
if let Some(v) = &raw.artist {
|
||||
msg.push_str(&format!("Artist: \"{}\"\n", v));
|
||||
}
|
||||
if let Some(v) = &raw.album {
|
||||
msg.push_str(&format!("Album: \"{}\"\n", v));
|
||||
}
|
||||
if let Some(v) = raw.year {
|
||||
msg.push_str(&format!("Year: {}\n", v));
|
||||
}
|
||||
if let Some(v) = raw.track_number {
|
||||
msg.push_str(&format!("Track number: {}\n", v));
|
||||
}
|
||||
if let Some(v) = &raw.genre {
|
||||
msg.push_str(&format!("Genre: \"{}\"\n", v));
|
||||
}
|
||||
|
||||
msg.push_str("\n## Hints from file path\n");
|
||||
if let Some(v) = &hints.artist {
|
||||
msg.push_str(&format!("Path artist: \"{}\"\n", v));
|
||||
}
|
||||
if let Some(v) = &hints.album {
|
||||
msg.push_str(&format!("Path album: \"{}\"\n", v));
|
||||
}
|
||||
if let Some(v) = hints.year {
|
||||
msg.push_str(&format!("Path year: {}\n", v));
|
||||
}
|
||||
if let Some(v) = hints.track_number {
|
||||
msg.push_str(&format!("Path track number: {}\n", v));
|
||||
}
|
||||
if let Some(v) = &hints.title {
|
||||
msg.push_str(&format!("Path title: \"{}\"\n", v));
|
||||
}
|
||||
|
||||
if !similar_artists.is_empty() {
|
||||
msg.push_str("\n## Existing artists in database (similar matches)\n");
|
||||
for a in similar_artists {
|
||||
msg.push_str(&format!("- \"{}\" (similarity: {:.2})\n", a.name, a.similarity));
|
||||
}
|
||||
}
|
||||
|
||||
if !similar_albums.is_empty() {
|
||||
msg.push_str("\n## Existing albums in database (similar matches)\n");
|
||||
for a in similar_albums {
|
||||
let year_str = a.year.map(|y| format!(", year: {}", y)).unwrap_or_default();
|
||||
msg.push_str(&format!("- \"{}\" (similarity: {:.2}{})\n", a.name, a.similarity, year_str));
|
||||
}
|
||||
}
|
||||
|
||||
msg
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct OllamaRequest {
|
||||
model: String,
|
||||
messages: Vec<OllamaMessage>,
|
||||
format: String,
|
||||
stream: bool,
|
||||
options: OllamaOptions,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct OllamaMessage {
|
||||
role: String,
|
||||
content: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct OllamaOptions {
|
||||
temperature: f64,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct OllamaResponse {
|
||||
message: OllamaResponseMessage,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct OllamaResponseMessage {
|
||||
content: String,
|
||||
}
|
||||
|
||||
async fn call_ollama(
|
||||
base_url: &str,
|
||||
model: &str,
|
||||
system_prompt: &str,
|
||||
user_message: &str,
|
||||
) -> anyhow::Result<String> {
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(std::time::Duration::from_secs(120))
|
||||
.build()?;
|
||||
|
||||
let request = OllamaRequest {
|
||||
model: model.to_owned(),
|
||||
messages: vec![
|
||||
OllamaMessage {
|
||||
role: "system".to_owned(),
|
||||
content: system_prompt.to_owned(),
|
||||
},
|
||||
OllamaMessage {
|
||||
role: "user".to_owned(),
|
||||
content: user_message.to_owned(),
|
||||
},
|
||||
],
|
||||
format: "json".to_owned(),
|
||||
stream: false,
|
||||
options: OllamaOptions { temperature: 0.1 },
|
||||
};
|
||||
|
||||
let url = format!("{}/api/chat", base_url.trim_end_matches('/'));
|
||||
tracing::info!(%url, model, prompt_len = user_message.len(), "Calling Ollama API...");
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
let resp = client.post(&url).json(&request).send().await?;
|
||||
let elapsed = start.elapsed();
|
||||
|
||||
if !resp.status().is_success() {
|
||||
let status = resp.status();
|
||||
let body = resp.text().await.unwrap_or_default();
|
||||
tracing::error!(%status, body = &body[..body.len().min(500)], "Ollama API error");
|
||||
anyhow::bail!("Ollama returned {}: {}", status, body);
|
||||
}
|
||||
|
||||
let ollama_resp: OllamaResponse = resp.json().await?;
|
||||
tracing::info!(
|
||||
elapsed_ms = elapsed.as_millis() as u64,
|
||||
response_len = ollama_resp.message.content.len(),
|
||||
"Ollama response received"
|
||||
);
|
||||
tracing::debug!(raw_response = %ollama_resp.message.content, "LLM raw output");
|
||||
Ok(ollama_resp.message.content)
|
||||
}
|
||||
|
||||
/// Parse the LLM JSON response into NormalizedFields.
|
||||
/// Handles both clean JSON and JSON wrapped in markdown code fences.
|
||||
fn parse_response(response: &str) -> anyhow::Result<NormalizedFields> {
|
||||
let cleaned = response.trim();
|
||||
|
||||
// Strip markdown code fences if present
|
||||
let json_str = if cleaned.starts_with("```") {
|
||||
let start = cleaned.find('{').unwrap_or(0);
|
||||
let end = cleaned.rfind('}').map(|i| i + 1).unwrap_or(cleaned.len());
|
||||
&cleaned[start..end]
|
||||
} else {
|
||||
cleaned
|
||||
};
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct LlmOutput {
|
||||
artist: Option<String>,
|
||||
album: Option<String>,
|
||||
title: Option<String>,
|
||||
year: Option<i32>,
|
||||
track_number: Option<i32>,
|
||||
genre: Option<String>,
|
||||
#[serde(default)]
|
||||
featured_artists: Vec<String>,
|
||||
confidence: Option<f64>,
|
||||
notes: Option<String>,
|
||||
}
|
||||
|
||||
let parsed: LlmOutput = serde_json::from_str(json_str)
|
||||
.map_err(|e| anyhow::anyhow!("Failed to parse LLM response as JSON: {} — raw: {}", e, response))?;
|
||||
|
||||
Ok(NormalizedFields {
|
||||
title: parsed.title,
|
||||
artist: parsed.artist,
|
||||
album: parsed.album,
|
||||
year: parsed.year,
|
||||
track_number: parsed.track_number,
|
||||
genre: parsed.genre,
|
||||
featured_artists: parsed.featured_artists,
|
||||
confidence: parsed.confidence,
|
||||
notes: parsed.notes,
|
||||
})
|
||||
}
|
||||
Reference in New Issue
Block a user