Added AI agent to manage metadata

This commit is contained in:
2026-03-18 02:21:00 +00:00
parent 8a49a5013b
commit d5068aaa33
17 changed files with 3384 additions and 1 deletions
+129
View File
@@ -0,0 +1,129 @@
use std::path::Path;
use symphonia::core::{
codecs::CODEC_TYPE_NULL,
formats::FormatOptions,
io::MediaSourceStream,
meta::{MetadataOptions, StandardTagKey},
probe::Hint,
};
#[derive(Debug, Default)]
pub struct RawMetadata {
pub title: Option<String>,
pub artist: Option<String>,
pub album: Option<String>,
pub track_number: Option<u32>,
pub year: Option<u32>,
pub genre: Option<String>,
pub duration_secs: Option<f64>,
}
/// Extract metadata from an audio file using Symphonia.
/// Must be called from a blocking context (spawn_blocking).
pub fn extract(path: &Path) -> anyhow::Result<RawMetadata> {
let file = std::fs::File::open(path)?;
let mss = MediaSourceStream::new(Box::new(file), Default::default());
let mut hint = Hint::new();
if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
hint.with_extension(ext);
}
let mut probed = symphonia::default::get_probe().format(
&hint,
mss,
&FormatOptions { enable_gapless: false, ..Default::default() },
&MetadataOptions::default(),
)?;
let mut meta = RawMetadata::default();
// Check metadata side-data (e.g., ID3 tags probed before format)
if let Some(rev) = probed.metadata.get().as_ref().and_then(|m| m.current()) {
extract_tags(rev.tags(), &mut meta);
}
// Also check format-embedded metadata
if let Some(rev) = probed.format.metadata().current() {
if meta.title.is_none() {
extract_tags(rev.tags(), &mut meta);
}
}
// Duration
meta.duration_secs = probed
.format
.tracks()
.iter()
.find(|t| t.codec_params.codec != CODEC_TYPE_NULL)
.and_then(|t| {
let n_frames = t.codec_params.n_frames?;
let tb = t.codec_params.time_base?;
Some(n_frames as f64 * tb.numer as f64 / tb.denom as f64)
});
Ok(meta)
}
fn extract_tags(tags: &[symphonia::core::meta::Tag], meta: &mut RawMetadata) {
for tag in tags {
let value = fix_encoding(tag.value.to_string());
if let Some(key) = tag.std_key {
match key {
StandardTagKey::TrackTitle => {
if meta.title.is_none() {
meta.title = Some(value);
}
}
StandardTagKey::Artist | StandardTagKey::Performer => {
if meta.artist.is_none() {
meta.artist = Some(value);
}
}
StandardTagKey::Album => {
if meta.album.is_none() {
meta.album = Some(value);
}
}
StandardTagKey::TrackNumber => {
if meta.track_number.is_none() {
meta.track_number = value.parse().ok();
}
}
StandardTagKey::Date | StandardTagKey::OriginalDate => {
if meta.year.is_none() {
meta.year = value[..4.min(value.len())].parse().ok();
}
}
StandardTagKey::Genre => {
if meta.genre.is_none() {
meta.genre = Some(value);
}
}
_ => {}
}
}
}
}
/// Heuristic to fix mojibake (CP1251 bytes interpreted as Latin-1/Windows-1252).
fn fix_encoding(s: String) -> String {
let bytes: Vec<u8> = s.chars().map(|c| c as u32).filter(|&c| c <= 255).map(|c| c as u8).collect();
if bytes.len() != s.chars().count() {
return s;
}
let has_mojibake = bytes.iter().any(|&b| b >= 0xC0);
if !has_mojibake {
return s;
}
let (decoded, _, errors) = encoding_rs::WINDOWS_1251.decode(&bytes);
if errors {
return s;
}
decoded.into_owned()
}
+518
View File
@@ -0,0 +1,518 @@
pub mod metadata;
pub mod normalize;
pub mod path_hints;
pub mod mover;
use std::sync::Arc;
use std::time::Duration;
use crate::db;
use crate::web::AppState;
pub async fn run(state: Arc<AppState>) {
let interval = Duration::from_secs(state.config.poll_interval_secs);
tracing::info!("Ingest loop started, polling every {}s: {:?}", state.config.poll_interval_secs, state.config.inbox_dir);
loop {
match scan_inbox(&state).await {
Ok(0) => {}
Ok(count) => tracing::info!(count, "processed new files"),
Err(e) => tracing::error!(?e, "inbox scan failed"),
}
tokio::time::sleep(interval).await;
}
}
async fn scan_inbox(state: &Arc<AppState>) -> anyhow::Result<usize> {
let mut count = 0;
let mut audio_files = Vec::new();
let mut image_files = Vec::new();
collect_files(&state.config.inbox_dir, &mut audio_files, &mut image_files).await?;
if !audio_files.is_empty() || !image_files.is_empty() {
tracing::info!("Scan found {} audio file(s) and {} image(s) in inbox", audio_files.len(), image_files.len());
}
for file_path in &audio_files {
match process_file(state, file_path).await {
Ok(true) => count += 1,
Ok(false) => tracing::debug!(path = ?file_path, "skipped (already known)"),
Err(e) => tracing::warn!(?e, path = ?file_path, "failed to process file"),
}
}
// Process cover images after audio (so albums exist in DB)
for image_path in &image_files {
match process_cover_image(state, image_path).await {
Ok(true) => {
tracing::info!(path = ?image_path, "Cover image processed");
count += 1;
}
Ok(false) => tracing::debug!(path = ?image_path, "cover image skipped"),
Err(e) => tracing::warn!(?e, path = ?image_path, "failed to process cover image"),
}
}
Ok(count)
}
/// Recursively collect all audio files and image files under a directory.
async fn collect_files(dir: &std::path::Path, audio: &mut Vec<std::path::PathBuf>, images: &mut Vec<std::path::PathBuf>) -> anyhow::Result<()> {
let mut entries = tokio::fs::read_dir(dir).await?;
while let Some(entry) = entries.next_entry().await? {
let name = entry.file_name().to_string_lossy().into_owned();
if name.starts_with('.') {
continue;
}
let ft = entry.file_type().await?;
if ft.is_dir() {
Box::pin(collect_files(&entry.path(), audio, images)).await?;
} else if ft.is_file() {
if is_audio_file(&name) {
audio.push(entry.path());
} else if is_cover_image(&name) {
images.push(entry.path());
}
}
}
Ok(())
}
fn is_audio_file(name: &str) -> bool {
let ext = name.rsplit('.').next().unwrap_or("").to_lowercase();
matches!(
ext.as_str(),
"mp3" | "flac" | "ogg" | "opus" | "aac" | "m4a" | "wav" | "ape" | "wv" | "wma" | "tta" | "aiff" | "aif"
)
}
fn is_cover_image(name: &str) -> bool {
let ext = name.rsplit('.').next().unwrap_or("").to_lowercase();
if !matches!(ext.as_str(), "jpg" | "jpeg" | "png" | "webp" | "bmp" | "gif") {
return false;
}
let stem = std::path::Path::new(name)
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("")
.to_lowercase();
matches!(
stem.as_str(),
"cover" | "front" | "folder" | "back" | "booklet" | "inlay" | "disc" | "cd"
| "album" | "artwork" | "art" | "scan" | "thumb" | "thumbnail"
)
}
fn classify_image(name: &str) -> &'static str {
let stem = std::path::Path::new(name)
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("")
.to_lowercase();
match stem.as_str() {
"back" => "back",
"booklet" | "inlay" | "scan" => "booklet",
"disc" | "cd" => "disc",
_ => "cover",
}
}
fn mime_for_image(name: &str) -> &'static str {
let ext = name.rsplit('.').next().unwrap_or("").to_lowercase();
match ext.as_str() {
"jpg" | "jpeg" => "image/jpeg",
"png" => "image/png",
"webp" => "image/webp",
"gif" => "image/gif",
"bmp" => "image/bmp",
_ => "application/octet-stream",
}
}
async fn process_file(state: &Arc<AppState>, file_path: &std::path::Path) -> anyhow::Result<bool> {
let filename = file_path.file_name().and_then(|n| n.to_str()).unwrap_or("?");
tracing::info!(file = filename, "Processing new file: {:?}", file_path);
// Compute file hash for dedup
tracing::info!(file = filename, "Computing file hash...");
let path_clone = file_path.to_path_buf();
let (hash, file_size) = tokio::task::spawn_blocking(move || -> anyhow::Result<(String, i64)> {
let data = std::fs::read(&path_clone)?;
let hash = blake3::hash(&data).to_hex().to_string();
let size = data.len() as i64;
Ok((hash, size))
})
.await??;
tracing::info!(file = filename, hash = &hash[..16], size = file_size, "File hashed");
// Skip if already known
if db::file_hash_exists(&state.pool, &hash).await? {
tracing::info!(file = filename, "Skipping: file hash already exists in database");
return Ok(false);
}
// Extract raw metadata
tracing::info!(file = filename, "Extracting metadata with Symphonia...");
let path_for_meta = file_path.to_path_buf();
let raw_meta = tokio::task::spawn_blocking(move || metadata::extract(&path_for_meta)).await??;
tracing::info!(
file = filename,
artist = raw_meta.artist.as_deref().unwrap_or("-"),
title = raw_meta.title.as_deref().unwrap_or("-"),
album = raw_meta.album.as_deref().unwrap_or("-"),
"Raw metadata extracted"
);
// Parse path hints relative to inbox dir
let relative = file_path.strip_prefix(&state.config.inbox_dir).unwrap_or(file_path);
let hints = path_hints::parse(relative);
if hints.artist.is_some() || hints.album.is_some() || hints.year.is_some() {
tracing::info!(
file = filename,
path_artist = hints.artist.as_deref().unwrap_or("-"),
path_album = hints.album.as_deref().unwrap_or("-"),
path_year = ?hints.year,
"Path hints parsed"
);
}
let inbox_path_str = file_path.to_string_lossy().to_string();
// Insert pending record
tracing::info!(file = filename, "Inserting pending track record...");
let pending_id = db::insert_pending(
&state.pool,
&inbox_path_str,
&hash,
file_size,
&db::RawFields {
title: raw_meta.title.clone(),
artist: raw_meta.artist.clone(),
album: raw_meta.album.clone(),
year: raw_meta.year.map(|y| y as i32),
track_number: raw_meta.track_number.map(|t| t as i32),
genre: raw_meta.genre.clone(),
},
&db::PathHints {
title: hints.title.clone(),
artist: hints.artist.clone(),
album: hints.album.clone(),
year: hints.year,
track_number: hints.track_number,
},
raw_meta.duration_secs,
)
.await?;
db::update_pending_status(&state.pool, pending_id, "processing", None).await?;
// RAG: find similar entries in DB
let artist_query = raw_meta.artist.as_deref()
.or(hints.artist.as_deref())
.unwrap_or("");
let album_query = raw_meta.album.as_deref()
.or(hints.album.as_deref())
.unwrap_or("");
tracing::info!(file = filename, "Searching database for similar artists/albums...");
let similar_artists = if !artist_query.is_empty() {
db::find_similar_artists(&state.pool, artist_query, 5).await.unwrap_or_default()
} else {
Vec::new()
};
let similar_albums = if !album_query.is_empty() {
db::find_similar_albums(&state.pool, album_query, 5).await.unwrap_or_default()
} else {
Vec::new()
};
if !similar_artists.is_empty() {
let names: Vec<&str> = similar_artists.iter().map(|a| a.name.as_str()).collect();
tracing::info!(file = filename, matches = ?names, "Found similar artists in DB");
}
if !similar_albums.is_empty() {
let names: Vec<&str> = similar_albums.iter().map(|a| a.name.as_str()).collect();
tracing::info!(file = filename, matches = ?names, "Found similar albums in DB");
}
// Call LLM for normalization
tracing::info!(file = filename, model = %state.config.ollama_model, "Sending to LLM for normalization...");
match normalize::normalize(state, &raw_meta, &hints, &similar_artists, &similar_albums).await {
Ok(normalized) => {
let confidence = normalized.confidence.unwrap_or(0.0);
let status = if confidence >= state.config.confidence_threshold {
"approved"
} else {
"review"
};
tracing::info!(
file = filename,
norm_artist = normalized.artist.as_deref().unwrap_or("-"),
norm_title = normalized.title.as_deref().unwrap_or("-"),
norm_album = normalized.album.as_deref().unwrap_or("-"),
confidence,
status,
notes = normalized.notes.as_deref().unwrap_or("-"),
"LLM normalization complete"
);
if !normalized.featured_artists.is_empty() {
tracing::info!(
file = filename,
featured = ?normalized.featured_artists,
"Featured artists detected"
);
}
db::update_pending_normalized(&state.pool, pending_id, status, &normalized, None).await?;
// Auto-approve: move file to storage
if status == "approved" {
let artist = normalized.artist.as_deref().unwrap_or("Unknown Artist");
let album = normalized.album.as_deref().unwrap_or("Unknown Album");
let title = normalized.title.as_deref().unwrap_or("Unknown Title");
let ext = file_path.extension().and_then(|e| e.to_str()).unwrap_or("flac");
let track_num = normalized.track_number.unwrap_or(0);
let dest_filename = if track_num > 0 {
format!("{:02} - {}.{}", track_num, sanitize_filename(title), ext)
} else {
format!("{}.{}", sanitize_filename(title), ext)
};
tracing::info!(
file = filename,
dest_artist = artist,
dest_album = album,
dest_filename = %dest_filename,
"Auto-approved, moving to storage..."
);
match mover::move_to_storage(
&state.config.storage_dir,
artist,
album,
&dest_filename,
file_path,
)
.await
{
Ok(storage_path) => {
let rel_path = storage_path.to_string_lossy().to_string();
match db::approve_and_finalize(&state.pool, pending_id, &rel_path).await {
Ok(track_id) => {
tracing::info!(file = filename, track_id, storage = %rel_path, "Track finalized in database");
}
Err(e) => {
tracing::error!(file = filename, ?e, "Failed to finalize track in DB after move");
}
}
}
Err(e) => {
tracing::error!(file = filename, ?e, "Failed to move file to storage");
db::update_pending_status(&state.pool, pending_id, "error", Some(&e.to_string())).await?;
}
}
} else {
tracing::info!(file = filename, confidence, "Sent to review queue (below threshold {})", state.config.confidence_threshold);
}
}
Err(e) => {
tracing::error!(file = filename, ?e, "LLM normalization failed");
db::update_pending_status(&state.pool, pending_id, "error", Some(&e.to_string())).await?;
}
}
Ok(true)
}
/// Process a cover image found in the inbox.
/// Uses path hints (Artist/Album/) to find the matching album in the DB,
/// then copies the image to the album's storage folder.
async fn process_cover_image(state: &Arc<AppState>, image_path: &std::path::Path) -> anyhow::Result<bool> {
let filename = image_path.file_name().and_then(|n| n.to_str()).unwrap_or("?");
// Hash for dedup
let path_clone = image_path.to_path_buf();
let (hash, file_size) = tokio::task::spawn_blocking(move || -> anyhow::Result<(String, i64)> {
let data = std::fs::read(&path_clone)?;
let hash = blake3::hash(&data).to_hex().to_string();
let size = data.len() as i64;
Ok((hash, size))
})
.await??;
if db::image_hash_exists(&state.pool, &hash).await? {
return Ok(false);
}
// Derive artist/album from path hints
let relative = image_path.strip_prefix(&state.config.inbox_dir).unwrap_or(image_path);
let components: Vec<&str> = relative
.components()
.filter_map(|c| c.as_os_str().to_str())
.collect();
tracing::info!(file = filename, path = ?relative, components = components.len(), "Processing cover image");
// Supported structures:
// Artist/Album/image.jpg (3+ components)
// Album/image.jpg (2 components — album dir + image)
if components.len() < 2 {
tracing::info!(file = filename, "Cover image not inside an album folder, skipping");
return Ok(false);
}
// The directory directly containing the image is always the album hint
let album_raw = components[components.len() - 2];
let path_artist = if components.len() >= 3 {
Some(components[components.len() - 3])
} else {
None
};
let (album_name, _) = path_hints::parse_album_year_public(album_raw);
tracing::info!(
file = filename,
path_artist = path_artist.unwrap_or("-"),
album_hint = %album_name,
"Looking up album in database..."
);
// Try to find album in DB — try with artist if available, then without
let album_id = if let Some(artist) = path_artist {
find_album_for_cover(&state.pool, artist, &album_name).await?
} else {
None
};
// If not found with artist, try fuzzy album name match across all artists
let album_id = match album_id {
Some(id) => Some(id),
None => {
let similar_albums = db::find_similar_albums(&state.pool, &album_name, 3).await.unwrap_or_default();
if let Some(best) = similar_albums.first() {
if best.similarity > 0.5 {
tracing::info!(file = filename, album = %best.name, similarity = best.similarity, "Matched album by fuzzy search");
Some(best.id)
} else {
None
}
} else {
None
}
}
};
let album_id = match album_id {
Some(id) => id,
None => {
tracing::info!(
file = filename,
artist = path_artist.unwrap_or("-"),
album = %album_name,
"No matching album found in DB, skipping cover"
);
return Ok(false);
}
};
// Determine image type and move to storage
let image_type = classify_image(filename);
let mime = mime_for_image(filename);
// Get album's storage path from any track in that album
let storage_dir_opt: Option<(String,)> = sqlx::query_as(
"SELECT storage_path FROM tracks WHERE album_id = $1 LIMIT 1"
)
.bind(album_id)
.fetch_optional(&state.pool)
.await?;
let album_storage_dir = match storage_dir_opt {
Some((track_path,)) => {
let p = std::path::Path::new(&track_path);
match p.parent() {
Some(dir) if dir.is_dir() => dir.to_path_buf(),
_ => {
tracing::warn!(file = filename, track_path = %track_path, "Track storage path has no valid parent dir");
return Ok(false);
}
}
}
None => {
tracing::info!(file = filename, album_id, "Album has no tracks in storage yet, skipping cover");
return Ok(false);
}
};
tracing::info!(file = filename, dest_dir = ?album_storage_dir, "Will copy cover to album storage dir");
let dest = album_storage_dir.join(filename);
if !dest.exists() {
// Move or copy image
match tokio::fs::rename(image_path, &dest).await {
Ok(()) => {}
Err(_) => {
tokio::fs::copy(image_path, &dest).await?;
tokio::fs::remove_file(image_path).await?;
}
}
}
let dest_str = dest.to_string_lossy().to_string();
db::insert_album_image(&state.pool, album_id, image_type, &dest_str, &hash, mime, file_size).await?;
tracing::info!(
file = filename,
album_id,
image_type,
dest = %dest_str,
"Album image saved"
);
Ok(true)
}
/// Find an album in DB matching the path-derived artist and album name.
/// Tries exact match, then fuzzy artist + exact album, then fuzzy artist + fuzzy album.
async fn find_album_for_cover(pool: &sqlx::PgPool, path_artist: &str, album_name: &str) -> anyhow::Result<Option<i64>> {
// Try exact match first
if let Some(id) = db::find_album_id(pool, path_artist, album_name).await? {
return Ok(Some(id));
}
// Try fuzzy artist, then exact or fuzzy album under that artist
let similar_artists = db::find_similar_artists(pool, path_artist, 5).await.unwrap_or_default();
for artist in &similar_artists {
if artist.similarity < 0.3 {
continue;
}
// Exact album under fuzzy artist
if let Some(id) = db::find_album_id(pool, &artist.name, album_name).await? {
return Ok(Some(id));
}
// Fuzzy album under this artist
let similar_albums = db::find_similar_albums(pool, album_name, 3).await.unwrap_or_default();
for album in &similar_albums {
if album.artist_id == artist.id && album.similarity > 0.4 {
return Ok(Some(album.id));
}
}
}
Ok(None)
}
/// Remove characters that are unsafe for filenames.
fn sanitize_filename(name: &str) -> String {
name.chars()
.map(|c| match c {
'/' | '\\' | ':' | '*' | '?' | '"' | '<' | '>' | '|' => '_',
_ => c,
})
.collect::<String>()
.trim()
.to_owned()
}
+54
View File
@@ -0,0 +1,54 @@
use std::path::{Path, PathBuf};
/// Move a file from inbox to the permanent storage directory.
///
/// Creates the directory structure: `storage_dir/artist/album/filename`
/// Returns the full path of the moved file.
///
/// If `rename` fails (cross-device), falls back to copy + remove.
pub async fn move_to_storage(
storage_dir: &Path,
artist: &str,
album: &str,
filename: &str,
source: &Path,
) -> anyhow::Result<PathBuf> {
let artist_dir = sanitize_dir_name(artist);
let album_dir = sanitize_dir_name(album);
let dest_dir = storage_dir.join(&artist_dir).join(&album_dir);
tokio::fs::create_dir_all(&dest_dir).await?;
let dest = dest_dir.join(filename);
// Avoid overwriting existing files
if dest.exists() {
anyhow::bail!("Destination already exists: {:?}", dest);
}
// Try atomic rename first (same filesystem)
match tokio::fs::rename(source, &dest).await {
Ok(()) => {}
Err(_) => {
// Cross-device: copy then remove
tokio::fs::copy(source, &dest).await?;
tokio::fs::remove_file(source).await?;
}
}
tracing::info!(from = ?source, to = ?dest, "moved file to storage");
Ok(dest)
}
/// Remove characters that are unsafe for directory names.
fn sanitize_dir_name(name: &str) -> String {
name.chars()
.map(|c| match c {
'/' | '\\' | ':' | '*' | '?' | '"' | '<' | '>' | '|' | '\0' => '_',
_ => c,
})
.collect::<String>()
.trim()
.trim_matches('.')
.to_owned()
}
+216
View File
@@ -0,0 +1,216 @@
use std::sync::Arc;
use serde::{Deserialize, Serialize};
use crate::db::{NormalizedFields, SimilarAlbum, SimilarArtist};
use crate::web::AppState;
use super::metadata::RawMetadata;
/// Build the user message with all context and call Ollama for normalization.
pub async fn normalize(
state: &Arc<AppState>,
raw: &RawMetadata,
hints: &crate::db::PathHints,
similar_artists: &[SimilarArtist],
similar_albums: &[SimilarAlbum],
) -> anyhow::Result<NormalizedFields> {
let user_message = build_user_message(raw, hints, similar_artists, similar_albums);
let response = call_ollama(
&state.config.ollama_url,
&state.config.ollama_model,
&state.system_prompt,
&user_message,
)
.await?;
parse_response(&response)
}
fn build_user_message(
raw: &RawMetadata,
hints: &crate::db::PathHints,
similar_artists: &[SimilarArtist],
similar_albums: &[SimilarAlbum],
) -> String {
let mut msg = String::from("## Raw metadata from file tags\n");
if let Some(v) = &raw.title {
msg.push_str(&format!("Title: \"{}\"\n", v));
}
if let Some(v) = &raw.artist {
msg.push_str(&format!("Artist: \"{}\"\n", v));
}
if let Some(v) = &raw.album {
msg.push_str(&format!("Album: \"{}\"\n", v));
}
if let Some(v) = raw.year {
msg.push_str(&format!("Year: {}\n", v));
}
if let Some(v) = raw.track_number {
msg.push_str(&format!("Track number: {}\n", v));
}
if let Some(v) = &raw.genre {
msg.push_str(&format!("Genre: \"{}\"\n", v));
}
msg.push_str("\n## Hints from file path\n");
if let Some(v) = &hints.artist {
msg.push_str(&format!("Path artist: \"{}\"\n", v));
}
if let Some(v) = &hints.album {
msg.push_str(&format!("Path album: \"{}\"\n", v));
}
if let Some(v) = hints.year {
msg.push_str(&format!("Path year: {}\n", v));
}
if let Some(v) = hints.track_number {
msg.push_str(&format!("Path track number: {}\n", v));
}
if let Some(v) = &hints.title {
msg.push_str(&format!("Path title: \"{}\"\n", v));
}
if !similar_artists.is_empty() {
msg.push_str("\n## Existing artists in database (similar matches)\n");
for a in similar_artists {
msg.push_str(&format!("- \"{}\" (similarity: {:.2})\n", a.name, a.similarity));
}
}
if !similar_albums.is_empty() {
msg.push_str("\n## Existing albums in database (similar matches)\n");
for a in similar_albums {
let year_str = a.year.map(|y| format!(", year: {}", y)).unwrap_or_default();
msg.push_str(&format!("- \"{}\" (similarity: {:.2}{})\n", a.name, a.similarity, year_str));
}
}
msg
}
#[derive(Serialize)]
struct OllamaRequest {
model: String,
messages: Vec<OllamaMessage>,
format: String,
stream: bool,
options: OllamaOptions,
}
#[derive(Serialize)]
struct OllamaMessage {
role: String,
content: String,
}
#[derive(Serialize)]
struct OllamaOptions {
temperature: f64,
}
#[derive(Deserialize)]
struct OllamaResponse {
message: OllamaResponseMessage,
}
#[derive(Deserialize)]
struct OllamaResponseMessage {
content: String,
}
async fn call_ollama(
base_url: &str,
model: &str,
system_prompt: &str,
user_message: &str,
) -> anyhow::Result<String> {
let client = reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(120))
.build()?;
let request = OllamaRequest {
model: model.to_owned(),
messages: vec![
OllamaMessage {
role: "system".to_owned(),
content: system_prompt.to_owned(),
},
OllamaMessage {
role: "user".to_owned(),
content: user_message.to_owned(),
},
],
format: "json".to_owned(),
stream: false,
options: OllamaOptions { temperature: 0.1 },
};
let url = format!("{}/api/chat", base_url.trim_end_matches('/'));
tracing::info!(%url, model, prompt_len = user_message.len(), "Calling Ollama API...");
let start = std::time::Instant::now();
let resp = client.post(&url).json(&request).send().await?;
let elapsed = start.elapsed();
if !resp.status().is_success() {
let status = resp.status();
let body = resp.text().await.unwrap_or_default();
tracing::error!(%status, body = &body[..body.len().min(500)], "Ollama API error");
anyhow::bail!("Ollama returned {}: {}", status, body);
}
let ollama_resp: OllamaResponse = resp.json().await?;
tracing::info!(
elapsed_ms = elapsed.as_millis() as u64,
response_len = ollama_resp.message.content.len(),
"Ollama response received"
);
tracing::debug!(raw_response = %ollama_resp.message.content, "LLM raw output");
Ok(ollama_resp.message.content)
}
/// Parse the LLM JSON response into NormalizedFields.
/// Handles both clean JSON and JSON wrapped in markdown code fences.
fn parse_response(response: &str) -> anyhow::Result<NormalizedFields> {
let cleaned = response.trim();
// Strip markdown code fences if present
let json_str = if cleaned.starts_with("```") {
let start = cleaned.find('{').unwrap_or(0);
let end = cleaned.rfind('}').map(|i| i + 1).unwrap_or(cleaned.len());
&cleaned[start..end]
} else {
cleaned
};
#[derive(Deserialize)]
struct LlmOutput {
artist: Option<String>,
album: Option<String>,
title: Option<String>,
year: Option<i32>,
track_number: Option<i32>,
genre: Option<String>,
#[serde(default)]
featured_artists: Vec<String>,
confidence: Option<f64>,
notes: Option<String>,
}
let parsed: LlmOutput = serde_json::from_str(json_str)
.map_err(|e| anyhow::anyhow!("Failed to parse LLM response as JSON: {} — raw: {}", e, response))?;
Ok(NormalizedFields {
title: parsed.title,
artist: parsed.artist,
album: parsed.album,
year: parsed.year,
track_number: parsed.track_number,
genre: parsed.genre,
featured_artists: parsed.featured_artists,
confidence: parsed.confidence,
notes: parsed.notes,
})
}
+203
View File
@@ -0,0 +1,203 @@
use std::path::Path;
use crate::db::PathHints;
/// Parse metadata hints from the file path relative to the inbox directory.
///
/// Recognized patterns:
/// Artist/Album/01 - Title.ext
/// Artist/Album (Year)/01 - Title.ext
/// Artist/(Year) Album/01 - Title.ext
/// Artist/Album [Year]/01 - Title.ext
/// 01 - Title.ext (flat, no artist/album)
pub fn parse(relative_path: &Path) -> PathHints {
let components: Vec<&str> = relative_path
.components()
.filter_map(|c| c.as_os_str().to_str())
.collect();
let mut hints = PathHints::default();
let filename = components.last().copied().unwrap_or("");
let stem = Path::new(filename)
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("");
// Parse track number and title from filename
parse_filename(stem, &mut hints);
match components.len() {
// Artist/Album/file.ext
3.. => {
hints.artist = Some(components[0].to_owned());
let album_raw = components[1];
let (album, year) = parse_album_with_year(album_raw);
hints.album = Some(album);
if year.is_some() {
hints.year = year;
}
}
// Album/file.ext (or Artist/file.ext — ambiguous, treat as album)
2 => {
let dir = components[0];
let (name, year) = parse_album_with_year(dir);
hints.album = Some(name);
if year.is_some() {
hints.year = year;
}
}
// Just file.ext
_ => {}
}
hints
}
/// Try to extract track number and title from a filename stem.
///
/// Patterns: "01 - Title", "01. Title", "1 Title", "Title"
fn parse_filename(stem: &str, hints: &mut PathHints) {
let trimmed = stem.trim();
// Try "NN - Title" or "NN. Title"
if let Some(rest) = try_strip_track_prefix(trimmed) {
let (num_str, title) = rest;
if let Ok(num) = num_str.parse::<i32>() {
hints.track_number = Some(num);
if !title.is_empty() {
hints.title = Some(title.to_owned());
}
return;
}
}
// No track number found, use full stem as title
if !trimmed.is_empty() {
hints.title = Some(trimmed.to_owned());
}
}
/// Try to parse "NN - Rest" or "NN. Rest" from a string.
/// Returns (number_str, rest) if successful.
fn try_strip_track_prefix(s: &str) -> Option<(&str, &str)> {
// Find leading digits
let digit_end = s.find(|c: char| !c.is_ascii_digit())?;
if digit_end == 0 {
return None;
}
let num_str = &s[..digit_end];
let rest = s[digit_end..].trim_start();
// Expect separator: " - ", ". ", "- ", or just space if followed by letter
let title = if let Some(stripped) = rest.strip_prefix("- ") {
stripped.trim()
} else if let Some(stripped) = rest.strip_prefix(". ") {
stripped.trim()
} else if let Some(stripped) = rest.strip_prefix('.') {
stripped.trim()
} else if let Some(stripped) = rest.strip_prefix("- ") {
stripped.trim()
} else {
// Just "01 Title" — digits followed by space then text
rest
};
Some((num_str, title))
}
/// Public wrapper for cover image processing.
pub fn parse_album_year_public(dir: &str) -> (String, Option<i32>) {
parse_album_with_year(dir)
}
/// Extract album name and optional year from directory name.
///
/// Patterns: "Album (2001)", "(2001) Album", "Album [2001]", "Album"
fn parse_album_with_year(dir: &str) -> (String, Option<i32>) {
// Try "Album (YYYY)" or "Album [YYYY]"
for (open, close) in [('(', ')'), ('[', ']')] {
if let Some(start) = dir.rfind(open) {
if let Some(end) = dir[start..].find(close) {
let inside = &dir[start + 1..start + end];
if let Ok(year) = inside.trim().parse::<i32>() {
if (1900..=2100).contains(&year) {
let album = format!("{}{}", &dir[..start].trim(), &dir[start + end + 1..].trim());
let album = album.trim().to_owned();
return (album, Some(year));
}
}
}
}
}
// Try "(YYYY) Album"
if dir.starts_with('(') {
if let Some(end) = dir.find(')') {
let inside = &dir[1..end];
if let Ok(year) = inside.trim().parse::<i32>() {
if (1900..=2100).contains(&year) {
let album = dir[end + 1..].trim().to_owned();
return (album, Some(year));
}
}
}
}
(dir.to_owned(), None)
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
#[test]
fn test_artist_album_track() {
let p = PathBuf::from("Pink Floyd/Wish You Were Here (1975)/03 - Have a Cigar.flac");
let h = parse(&p);
assert_eq!(h.artist.as_deref(), Some("Pink Floyd"));
assert_eq!(h.album.as_deref(), Some("Wish You Were Here"));
assert_eq!(h.year, Some(1975));
assert_eq!(h.track_number, Some(3));
assert_eq!(h.title.as_deref(), Some("Have a Cigar"));
}
#[test]
fn test_year_prefix() {
let p = PathBuf::from("Artist/(2020) Album Name/01. Song.flac");
let h = parse(&p);
assert_eq!(h.artist.as_deref(), Some("Artist"));
assert_eq!(h.album.as_deref(), Some("Album Name"));
assert_eq!(h.year, Some(2020));
assert_eq!(h.track_number, Some(1));
assert_eq!(h.title.as_deref(), Some("Song"));
}
#[test]
fn test_flat_file() {
let p = PathBuf::from("05 - Something.mp3");
let h = parse(&p);
assert_eq!(h.artist, None);
assert_eq!(h.album, None);
assert_eq!(h.track_number, Some(5));
assert_eq!(h.title.as_deref(), Some("Something"));
}
#[test]
fn test_no_track_number() {
let p = PathBuf::from("Artist/Album/Song Name.flac");
let h = parse(&p);
assert_eq!(h.track_number, None);
assert_eq!(h.title.as_deref(), Some("Song Name"));
}
#[test]
fn test_square_bracket_year() {
let p = PathBuf::from("Band/Album [1999]/track.flac");
let h = parse(&p);
assert_eq!(h.album.as_deref(), Some("Album"));
assert_eq!(h.year, Some(1999));
}
}