use std::time::Duration; use serde_json::Value; use crate::scheduler::{Job, JobContext, JobLog}; const LASTFM_TAG_REQUEST_DELAY: Duration = Duration::from_millis(1200); const LASTFM_TAG_LIMIT: usize = 12; #[derive(Debug, Clone, Copy)] pub struct MetadataBackfillOptions { pub audio_bitrate: bool, pub audio_sample_rate: bool, pub audio_bit_depth: bool, pub duration_seconds: bool, pub local_genres: bool, pub lastfm_tags: bool, pub musicbrainz_tags: bool, pub overwrite: bool, } impl MetadataBackfillOptions { pub fn any_field(self) -> bool { self.audio_bitrate || self.audio_sample_rate || self.audio_bit_depth || self.duration_seconds || self.local_genres || self.lastfm_tags || self.musicbrainz_tags } fn needs_file_scan(self) -> bool { self.audio_bitrate || self.audio_sample_rate || self.audio_bit_depth || self.duration_seconds || self.local_genres } } #[derive(sqlx::FromRow)] struct BackfillRow { media_file_id: i64, file_path: String, audio_bitrate: Option, audio_sample_rate: Option, audio_bit_depth: Option, track_id: Option, duration_seconds: Option, } #[derive(sqlx::FromRow)] struct LastfmArtistTagRow { id: i64, name: String, } #[derive(sqlx::FromRow)] struct LastfmReleaseTagRow { id: i64, title: String, artist_name: Option, track_title: Option, } #[derive(sqlx::FromRow)] struct LastfmTrackTagRow { id: i64, title: String, artist_name: Option, } #[derive(Debug, Clone)] struct TagCandidate { name: String, weight: f64, } #[derive(Debug, Default)] struct LastfmTagStats { considered: u64, updated_entities: u64, tags_saved: u64, skipped_existing: u64, not_found: u64, failed: u64, } #[derive(Debug, Default)] struct MusicBrainzTagStats { considered: u64, updated_entities: u64, tags_saved: u64, skipped_existing: u64, not_found: u64, failed: u64, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum LastfmTagPassResult { Completed, RateLimited, } pub struct MetadataBackfillJob; #[async_trait::async_trait] impl Job for MetadataBackfillJob { fn name(&self) -> &'static str { "metadata_backfill" } fn description(&self) -> &'static str { "Backfill technical audio metadata, local genres, and Last.fm tags" } fn default_cron(&self) -> &'static str { "" } async fn run(&self, ctx: &JobContext, log: &mut JobLog) -> anyhow::Result<()> { run_with_options( ctx, log, MetadataBackfillOptions { audio_bitrate: true, audio_sample_rate: true, audio_bit_depth: true, duration_seconds: true, local_genres: true, lastfm_tags: true, musicbrainz_tags: true, overwrite: false, }, ) .await } } pub async fn run_with_options( ctx: &JobContext, log: &mut JobLog, options: MetadataBackfillOptions, ) -> anyhow::Result<()> { if !options.any_field() { log.warn("No metadata fields selected; nothing to backfill"); return Ok(()); } let mut scanned = 0u64; let mut media_updated = 0u64; let mut track_updated = 0u64; let mut local_tags_updated = 0u64; let mut unchanged = 0u64; let mut missing = 0u64; let mut failed = 0u64; log.info(&format!( "Metadata backfill options: file_scan={}, local_genres={}, lastfm_tags={}, musicbrainz_tags={}, mode={}", options.needs_file_scan(), options.local_genres, options.lastfm_tags, options.musicbrainz_tags, if options.overwrite { "overwrite" } else { "fill_missing" } )); if options.needs_file_scan() { let rows = sqlx::query_as::<_, BackfillRow>( "SELECT mf.id AS media_file_id, mf.file_path, \ mf.audio_bitrate, mf.audio_sample_rate, mf.audio_bit_depth, \ t.id AS track_id, t.duration_seconds \ FROM furumusic__media_file mf \ LEFT JOIN furumusic__track t ON t.audio_file_id = mf.id \ WHERE mf.file_type = 'audio' \ ORDER BY mf.id", ) .fetch_all(&ctx.pool) .await?; log.info(&format!( "Metadata file backfill started: {} audio file(s), mode={}", rows.len(), if options.overwrite { "overwrite" } else { "fill_missing" } )); for row in rows { scanned += 1; let path = crate::media_paths::resolve_media_file_path( &ctx.config.agent_storage_dir, &row.file_path, ); if !path.exists() { missing += 1; log.warn(&format!("missing file: {}", row.file_path)); continue; } let extract_path = path.clone(); let raw_meta = match tokio::task::spawn_blocking(move || { crate::agent::metadata::extract(&extract_path) }) .await { Ok(Ok(meta)) => meta, Ok(Err(e)) => { failed += 1; log.warn(&format!("metadata error for {}: {e}", path.display())); continue; } Err(e) => { failed += 1; log.warn(&format!("metadata task failed for {}: {e}", path.display())); continue; } }; let mut changed_media = false; let mut next_bitrate = row.audio_bitrate; let mut next_sample_rate = row.audio_sample_rate; let mut next_bit_depth = row.audio_bit_depth; if options.audio_bitrate && should_update(row.audio_bitrate, options.overwrite) { if let Some(value) = raw_meta.audio_bitrate { next_bitrate = Some(value); changed_media = next_bitrate != row.audio_bitrate || changed_media; } } if options.audio_sample_rate && should_update(row.audio_sample_rate, options.overwrite) { if let Some(value) = raw_meta.audio_sample_rate { next_sample_rate = Some(value); changed_media = next_sample_rate != row.audio_sample_rate || changed_media; } } if options.audio_bit_depth && should_update(row.audio_bit_depth, options.overwrite) { if let Some(value) = raw_meta.audio_bit_depth { next_bit_depth = Some(value); changed_media = next_bit_depth != row.audio_bit_depth || changed_media; } } let mut changed_track = false; let mut next_duration = row.duration_seconds; if options.duration_seconds && row.track_id.is_some() && should_update_duration(row.duration_seconds, options.overwrite) { if let Some(value) = raw_meta.duration_secs { next_duration = Some(value); changed_track = row .duration_seconds .map(|current| (current - value).abs() > 0.001) .unwrap_or(true); } } let mut changed_tags = false; if options.local_genres { if let (Some(track_id), Some(genre)) = (row.track_id, raw_meta.genre.as_deref()) { let saved = save_track_tag_text(&ctx.pool, track_id, genre, "file", options.overwrite) .await?; if saved > 0 { local_tags_updated += saved; changed_tags = true; } } } if changed_media { sqlx::query( "UPDATE furumusic__media_file \ SET audio_bitrate = $1, audio_sample_rate = $2, audio_bit_depth = $3 \ WHERE id = $4", ) .bind(next_bitrate) .bind(next_sample_rate) .bind(next_bit_depth) .bind(row.media_file_id) .execute(&ctx.pool) .await?; media_updated += 1; } if changed_track { if let (Some(track_id), Some(duration)) = (row.track_id, next_duration) { sqlx::query("UPDATE furumusic__track SET duration_seconds = $1 WHERE id = $2") .bind(duration) .bind(track_id) .execute(&ctx.pool) .await?; track_updated += 1; } } if !changed_media && !changed_track && !changed_tags { unchanged += 1; } if scanned % 100 == 0 { log.info(&format!( "Progress: {scanned} scanned, {media_updated} media updated, {track_updated} tracks updated, {local_tags_updated} local tags saved, {unchanged} unchanged, {missing} missing, {failed} failed" )); } } } let lastfm_stats = if options.lastfm_tags { log.info("Metadata file backfill finished; starting Last.fm tag backfill"); backfill_lastfm_tags(ctx, log, options.overwrite).await? } else { log.info("Last.fm tag backfill disabled for this run"); LastfmTagStats::default() }; let musicbrainz_stats = if options.musicbrainz_tags { log.info("Starting MusicBrainz tag backfill"); backfill_musicbrainz_tags(ctx, log, options.overwrite).await? } else { log.info("MusicBrainz tag backfill disabled for this run"); MusicBrainzTagStats::default() }; log.info(&format!( "Metadata backfill complete: {scanned} scanned, {media_updated} media updated, {track_updated} tracks updated, {local_tags_updated} local tags saved, {unchanged} unchanged, {missing} missing, {failed} failed; Last.fm tags: considered={}, updated_entities={}, tags_saved={}, skipped_existing={}, not_found={}, failed={}; MusicBrainz tags: considered={}, updated_entities={}, tags_saved={}, skipped_existing={}, not_found={}, failed={}", lastfm_stats.considered, lastfm_stats.updated_entities, lastfm_stats.tags_saved, lastfm_stats.skipped_existing, lastfm_stats.not_found, lastfm_stats.failed, musicbrainz_stats.considered, musicbrainz_stats.updated_entities, musicbrainz_stats.tags_saved, musicbrainz_stats.skipped_existing, musicbrainz_stats.not_found, musicbrainz_stats.failed, )); Ok(()) } pub async fn save_approved_track_genres( pool: &sqlx::PgPool, track_id: i64, genre_text: &str, ) -> anyhow::Result { save_track_tag_text(pool, track_id, genre_text, "review", false).await } async fn backfill_lastfm_tags( ctx: &JobContext, log: &mut JobLog, overwrite: bool, ) -> anyhow::Result { let api_key = ctx.config.lastfm_api_key.trim(); if api_key.is_empty() { log.warn("lastfm_api_key is not configured, skipping Last.fm tag backfill"); return Ok(LastfmTagStats::default()); } log.info("Last.fm tag backfill started"); let client = reqwest::Client::builder() .user_agent("furumusic-metadata-backfill/0.1") .timeout(Duration::from_secs(15)) .build()?; let mut stats = LastfmTagStats::default(); if backfill_lastfm_artist_tags(ctx, log, &client, api_key, overwrite, &mut stats).await? == LastfmTagPassResult::RateLimited { return Ok(stats); } if backfill_lastfm_release_tags(ctx, log, &client, api_key, overwrite, &mut stats).await? == LastfmTagPassResult::RateLimited { return Ok(stats); } if backfill_lastfm_track_tags(ctx, log, &client, api_key, overwrite, &mut stats).await? == LastfmTagPassResult::RateLimited { return Ok(stats); } Ok(stats) } async fn backfill_musicbrainz_tags( ctx: &JobContext, log: &mut JobLog, overwrite: bool, ) -> anyhow::Result { log.info("MusicBrainz tag backfill started"); let client = crate::jobs::musicbrainz::MusicBrainzClient::new("furumusic-metadata-backfill")?; let mut stats = MusicBrainzTagStats::default(); backfill_musicbrainz_artist_tags(ctx, log, &client, overwrite, &mut stats).await?; backfill_musicbrainz_release_tags(ctx, log, &client, overwrite, &mut stats).await?; Ok(stats) } async fn backfill_musicbrainz_artist_tags( ctx: &JobContext, log: &mut JobLog, client: &crate::jobs::musicbrainz::MusicBrainzClient, overwrite: bool, stats: &mut MusicBrainzTagStats, ) -> anyhow::Result<()> { let rows = sqlx::query_as::<_, LastfmArtistTagRow>( r#"SELECT DISTINCT a.id, a.name::text AS name FROM furumusic__artist a JOIN furumusic__track_artist ta ON ta.artist_id = a.id JOIN furumusic__track t ON t.id = ta.track_id WHERE a.is_hidden = false AND t.is_hidden = false ORDER BY a.id"#, ) .fetch_all(&ctx.pool) .await?; log.info(&format!( "MusicBrainz artist tag pass: checking {} artist(s)", rows.len() )); let total = rows.len(); for (index, row) in rows.into_iter().enumerate() { if should_log_lastfm_item(index + 1, total, 25) { log.info(&format!( "MusicBrainz artist tags {}/{}: artist {} \"{}\"", index + 1, total, row.id, row.name )); } if should_skip_source_entity(&ctx.pool, "artist", row.id, "musicbrainz", overwrite).await? { stats.skipped_existing += 1; continue; } stats.considered += 1; let mbid = match crate::jobs::musicbrainz::load_external_id(&ctx.pool, "artist", row.id, "artist") .await? { Some(mbid) => Some(mbid), None => match client.search_artist(&row.name).await { Ok(Some(found)) => { crate::jobs::musicbrainz::save_external_id( &ctx.pool, "artist", row.id, "artist", &found.mbid, found.score as f64 / 100.0, ) .await?; Some(found.mbid) } Ok(None) => None, Err(err) => { stats.failed += 1; log.warn(&format!( "MusicBrainz artist search failed for artist {} \"{}\": {err}", row.id, row.name )); None } }, }; let Some(mbid) = mbid else { stats.not_found += 1; continue; }; match client.lookup_artist_tags(&mbid).await { Ok(tags) if !tags.is_empty() => { let tags = musicbrainz_tags_to_candidates(&tags); match replace_entity_tags(&ctx.pool, "artist", row.id, &tags, "musicbrainz", false) .await { Ok(saved) => { stats.tags_saved += saved; stats.updated_entities += 1; } Err(err) => { stats.failed += 1; log.warn(&format!( "MusicBrainz artist tags save failed for artist {} \"{}\": {err}", row.id, row.name )); } } } Ok(_) => { stats.not_found += 1; } Err(err) => { stats.failed += 1; log.warn(&format!( "MusicBrainz artist tags failed for artist {} \"{}\" mbid={}: {err}", row.id, row.name, mbid )); } } } Ok(()) } async fn backfill_musicbrainz_release_tags( ctx: &JobContext, log: &mut JobLog, client: &crate::jobs::musicbrainz::MusicBrainzClient, overwrite: bool, stats: &mut MusicBrainzTagStats, ) -> anyhow::Result<()> { let rows = sqlx::query_as::<_, LastfmReleaseTagRow>( r#"SELECT r.id, r.title::text AS title, ( SELECT a.name::text FROM furumusic__release_artist ra JOIN furumusic__artist a ON a.id = ra.artist_id WHERE ra.release_id = r.id ORDER BY ra.position LIMIT 1 ) AS artist_name, ( SELECT t.title::text FROM furumusic__track t WHERE t.release_id = r.id AND t.is_hidden = false ORDER BY t.disc_number NULLS LAST, t.track_number NULLS LAST, t.id LIMIT 1 ) AS track_title FROM furumusic__release r WHERE r.is_hidden = false ORDER BY r.id"#, ) .fetch_all(&ctx.pool) .await?; log.info(&format!( "MusicBrainz release tag pass: checking {} release(s)", rows.len() )); let total = rows.len(); for (index, row) in rows.into_iter().enumerate() { if should_log_lastfm_item(index + 1, total, 25) { log.info(&format!( "MusicBrainz release tags {}/{}: release {} \"{}\"", index + 1, total, row.id, row.title )); } if should_skip_source_entity(&ctx.pool, "release", row.id, "musicbrainz", overwrite).await? { stats.skipped_existing += 1; continue; } let Some(artist) = row .artist_name .as_deref() .filter(|value| !value.trim().is_empty()) else { stats.not_found += 1; continue; }; stats.considered += 1; let mbid = match crate::jobs::musicbrainz::load_or_search_release_mbid( &ctx.pool, client, row.id, artist, &row.title, row.track_title.as_deref(), ) .await { Ok((release_mbid, _release_group_mbid)) => release_mbid, Err(err) => { stats.failed += 1; log.warn(&format!( "MusicBrainz release search failed for release {} \"{}\" / \"{}\": {err}", row.id, artist, row.title )); None } }; let Some(mbid) = mbid else { stats.not_found += 1; continue; }; match client.lookup_release_tags(&mbid).await { Ok(result) if !result.tags.is_empty() => { if let Some(group_mbid) = result.release_group_mbid.as_deref() { crate::jobs::musicbrainz::save_external_id( &ctx.pool, "release", row.id, "release_group", group_mbid, 1.0, ) .await?; } let tags = musicbrainz_tags_to_candidates(&result.tags); match replace_entity_tags(&ctx.pool, "release", row.id, &tags, "musicbrainz", false) .await { Ok(saved) => { stats.tags_saved += saved; stats.updated_entities += 1; } Err(err) => { stats.failed += 1; log.warn(&format!( "MusicBrainz release tags save failed for release {} \"{}\" / \"{}\": {err}", row.id, artist, row.title )); } } } Ok(_) => { stats.not_found += 1; } Err(err) => { stats.failed += 1; log.warn(&format!( "MusicBrainz release tags failed for release {} \"{}\" mbid={}: {err}", row.id, row.title, mbid )); } } } Ok(()) } async fn backfill_lastfm_artist_tags( ctx: &JobContext, log: &mut JobLog, client: &reqwest::Client, api_key: &str, overwrite: bool, stats: &mut LastfmTagStats, ) -> anyhow::Result { let rows = sqlx::query_as::<_, LastfmArtistTagRow>( r#"SELECT DISTINCT a.id, a.name::text AS name FROM furumusic__artist a JOIN furumusic__track_artist ta ON ta.artist_id = a.id JOIN furumusic__track t ON t.id = ta.track_id WHERE a.is_hidden = false AND t.is_hidden = false ORDER BY a.id"#, ) .fetch_all(&ctx.pool) .await?; log.info(&format!( "Last.fm artist tag pass: checking {} artist(s)", rows.len() )); let total = rows.len(); for (index, row) in rows.into_iter().enumerate() { if should_log_lastfm_item(index + 1, total, 25) { log.info(&format!( "Last.fm artist tags {}/{}: artist {} \"{}\"", index + 1, total, row.id, row.name )); } match should_skip_lastfm_entity(&ctx.pool, "artist", row.id, overwrite).await { Ok(true) => { stats.skipped_existing += 1; if should_log_lastfm_progress(index + 1, total, 25) { log.info(&format!( "Last.fm artist tags progress: {}/{}", index + 1, total )); } continue; } Ok(false) => {} Err(err) => { stats.failed += 1; log.warn(&format!( "Last.fm artist tags skip check failed for artist {} \"{}\": {err}", row.id, row.name )); continue; } } stats.considered += 1; match fetch_lastfm_artist_tags(client, api_key, &row.name).await { Ok(tags) if !tags.is_empty() => { match replace_entity_tags(&ctx.pool, "artist", row.id, &tags, "lastfm", false).await { Ok(saved) => { stats.tags_saved += saved; stats.updated_entities += 1; } Err(err) => { stats.failed += 1; log.warn(&format!( "Last.fm artist tags save failed for artist {} \"{}\": {err}", row.id, row.name )); } } } Ok(_) => { stats.not_found += 1; } Err(err) if is_lastfm_rate_limit_error(&err) => { stats.failed += 1; log.warn(&format!( "Last.fm rate limit reached while fetching artist tags for artist {} \"{}\"; stopping Last.fm tag backfill for this run", row.id, row.name )); return Ok(LastfmTagPassResult::RateLimited); } Err(err) => { stats.failed += 1; log.warn(&format!( "Last.fm artist tags failed for artist {} \"{}\": {err}", row.id, row.name )); } } if should_log_lastfm_progress(index + 1, total, 25) { log.info(&format!( "Last.fm artist tags progress: {}/{}", index + 1, total )); } tokio::time::sleep(LASTFM_TAG_REQUEST_DELAY).await; } Ok(LastfmTagPassResult::Completed) } async fn backfill_lastfm_release_tags( ctx: &JobContext, log: &mut JobLog, client: &reqwest::Client, api_key: &str, overwrite: bool, stats: &mut LastfmTagStats, ) -> anyhow::Result { let rows = sqlx::query_as::<_, LastfmReleaseTagRow>( r#"SELECT r.id, r.title::text AS title, ( SELECT a.name::text FROM furumusic__release_artist ra JOIN furumusic__artist a ON a.id = ra.artist_id WHERE ra.release_id = r.id ORDER BY ra.position LIMIT 1 ) AS artist_name, ( SELECT t.title::text FROM furumusic__track t WHERE t.release_id = r.id AND t.is_hidden = false ORDER BY t.disc_number NULLS LAST, t.track_number NULLS LAST, t.id LIMIT 1 ) AS track_title FROM furumusic__release r WHERE r.is_hidden = false ORDER BY r.id"#, ) .fetch_all(&ctx.pool) .await?; log.info(&format!( "Last.fm release tag pass: checking {} release(s)", rows.len() )); let total = rows.len(); for (index, row) in rows.into_iter().enumerate() { if should_log_lastfm_item(index + 1, total, 25) { log.info(&format!( "Last.fm release tags {}/{}: release {} \"{}\"", index + 1, total, row.id, row.title )); } match should_skip_lastfm_entity(&ctx.pool, "release", row.id, overwrite).await { Ok(true) => { stats.skipped_existing += 1; if should_log_lastfm_progress(index + 1, total, 25) { log.info(&format!( "Last.fm release tags progress: {}/{}", index + 1, total )); } continue; } Ok(false) => {} Err(err) => { stats.failed += 1; log.warn(&format!( "Last.fm release tags skip check failed for release {} \"{}\": {err}", row.id, row.title )); continue; } } let Some(artist) = row .artist_name .as_deref() .filter(|value| !value.trim().is_empty()) else { stats.not_found += 1; if should_log_lastfm_progress(index + 1, total, 25) { log.info(&format!( "Last.fm release tags progress: {}/{}", index + 1, total )); } continue; }; stats.considered += 1; match fetch_lastfm_album_tags(client, api_key, artist, &row.title).await { Ok(tags) if !tags.is_empty() => { match replace_entity_tags(&ctx.pool, "release", row.id, &tags, "lastfm", false) .await { Ok(saved) => { stats.tags_saved += saved; stats.updated_entities += 1; } Err(err) => { stats.failed += 1; log.warn(&format!( "Last.fm release tags save failed for release {} \"{}\" / \"{}\": {err}", row.id, artist, row.title )); } } } Ok(_) => { stats.not_found += 1; } Err(err) if is_lastfm_rate_limit_error(&err) => { stats.failed += 1; log.warn(&format!( "Last.fm rate limit reached while fetching release tags for release {} \"{}\" / \"{}\"; stopping Last.fm tag backfill for this run", row.id, artist, row.title )); return Ok(LastfmTagPassResult::RateLimited); } Err(err) => { stats.failed += 1; log.warn(&format!( "Last.fm release tags failed for release {} \"{}\" / \"{}\": {err}", row.id, artist, row.title )); } } if should_log_lastfm_progress(index + 1, total, 25) { log.info(&format!( "Last.fm release tags progress: {}/{}", index + 1, total )); } tokio::time::sleep(LASTFM_TAG_REQUEST_DELAY).await; } Ok(LastfmTagPassResult::Completed) } async fn backfill_lastfm_track_tags( ctx: &JobContext, log: &mut JobLog, client: &reqwest::Client, api_key: &str, overwrite: bool, stats: &mut LastfmTagStats, ) -> anyhow::Result { let rows = sqlx::query_as::<_, LastfmTrackTagRow>( r#"SELECT t.id, t.title::text AS title, ( SELECT a.name::text FROM furumusic__track_artist ta JOIN furumusic__artist a ON a.id = ta.artist_id WHERE ta.track_id = t.id AND ta.role <> 'featuring' ORDER BY ta.position LIMIT 1 ) AS artist_name FROM furumusic__track t WHERE t.is_hidden = false ORDER BY t.id"#, ) .fetch_all(&ctx.pool) .await?; log.info(&format!( "Last.fm track tag pass: checking {} track(s)", rows.len() )); let total = rows.len(); for (index, row) in rows.into_iter().enumerate() { if should_log_lastfm_item(index + 1, total, 50) { log.info(&format!( "Last.fm track tags {}/{}: track {} \"{}\"", index + 1, total, row.id, row.title )); } match should_skip_lastfm_entity(&ctx.pool, "track", row.id, overwrite).await { Ok(true) => { stats.skipped_existing += 1; if should_log_lastfm_progress(index + 1, total, 50) { log.info(&format!( "Last.fm track tags progress: {}/{}", index + 1, total )); } continue; } Ok(false) => {} Err(err) => { stats.failed += 1; log.warn(&format!( "Last.fm track tags skip check failed for track {} \"{}\": {err}", row.id, row.title )); continue; } } let Some(artist) = row .artist_name .as_deref() .filter(|value| !value.trim().is_empty()) else { stats.not_found += 1; if should_log_lastfm_progress(index + 1, total, 50) { log.info(&format!( "Last.fm track tags progress: {}/{}", index + 1, total )); } continue; }; stats.considered += 1; match fetch_lastfm_track_tags(client, api_key, artist, &row.title).await { Ok(tags) if !tags.is_empty() => { match replace_entity_tags(&ctx.pool, "track", row.id, &tags, "lastfm", true).await { Ok(saved) => { stats.tags_saved += saved; stats.updated_entities += 1; } Err(err) => { stats.failed += 1; log.warn(&format!( "Last.fm track tags save failed for track {} \"{}\" / \"{}\": {err}", row.id, artist, row.title )); } } } Ok(_) => { stats.not_found += 1; } Err(err) if is_lastfm_rate_limit_error(&err) => { stats.failed += 1; log.warn(&format!( "Last.fm rate limit reached while fetching track tags for track {} \"{}\" / \"{}\"; stopping Last.fm tag backfill for this run", row.id, artist, row.title )); return Ok(LastfmTagPassResult::RateLimited); } Err(err) => { stats.failed += 1; log.warn(&format!( "Last.fm track tags failed for track {} \"{}\" / \"{}\": {err}", row.id, artist, row.title )); } } if should_log_lastfm_progress(index + 1, total, 50) { log.info(&format!( "Last.fm track tags progress: {}/{}", index + 1, total )); } tokio::time::sleep(LASTFM_TAG_REQUEST_DELAY).await; } Ok(LastfmTagPassResult::Completed) } fn should_log_lastfm_progress(done: usize, total: usize, every: usize) -> bool { total > 0 && (done == total || done % every == 0) } fn should_log_lastfm_item(done: usize, total: usize, every: usize) -> bool { total > 0 && (done == 1 || done == total || done % every == 0) } fn is_lastfm_rate_limit_error(err: &anyhow::Error) -> bool { err.to_string().contains("Last.fm rate limit exceeded") } async fn should_skip_lastfm_entity( pool: &sqlx::PgPool, entity_kind: &str, entity_id: i64, overwrite: bool, ) -> anyhow::Result { should_skip_source_entity(pool, entity_kind, entity_id, "lastfm", overwrite).await } async fn should_skip_source_entity( pool: &sqlx::PgPool, entity_kind: &str, entity_id: i64, source: &str, overwrite: bool, ) -> anyhow::Result { if overwrite { return Ok(false); } let exists: Option = sqlx::query_scalar( r#"SELECT 1 FROM furumusic__entity_genre_tag WHERE entity_kind = $1 AND entity_id = $2 AND source = $3 LIMIT 1"#, ) .bind(entity_kind) .bind(entity_id) .bind(source) .fetch_optional(pool) .await?; Ok(exists.is_some()) } fn musicbrainz_tags_to_candidates( tags: &[crate::jobs::musicbrainz::MusicBrainzTag], ) -> Vec { tags.iter() .map(|tag| TagCandidate { name: tag.name.clone(), weight: tag.weight, }) .collect() } async fn fetch_lastfm_artist_tags( client: &reqwest::Client, api_key: &str, artist: &str, ) -> anyhow::Result> { fetch_lastfm_top_tags( client, &[ ("method", "artist.getTopTags"), ("api_key", api_key), ("artist", artist), ("autocorrect", "1"), ("format", "json"), ], ) .await } async fn fetch_lastfm_album_tags( client: &reqwest::Client, api_key: &str, artist: &str, album: &str, ) -> anyhow::Result> { fetch_lastfm_top_tags( client, &[ ("method", "album.getTopTags"), ("api_key", api_key), ("artist", artist), ("album", album), ("autocorrect", "1"), ("format", "json"), ], ) .await } async fn fetch_lastfm_track_tags( client: &reqwest::Client, api_key: &str, artist: &str, track: &str, ) -> anyhow::Result> { fetch_lastfm_top_tags( client, &[ ("method", "track.getTopTags"), ("api_key", api_key), ("artist", artist), ("track", track), ("autocorrect", "1"), ("format", "json"), ], ) .await } async fn fetch_lastfm_top_tags( client: &reqwest::Client, query: &[(&str, &str)], ) -> anyhow::Result> { let response = client .get("https://ws.audioscrobbler.com/2.0/") .query(query) .send() .await?; if response.status() == reqwest::StatusCode::NOT_FOUND { return Ok(Vec::new()); } let response = response.error_for_status()?; let body: Value = response.json().await?; if let Some(code) = body.get("error").and_then(|value| value.as_i64()) { if code == 29 { anyhow::bail!("Last.fm rate limit exceeded"); } if code == 6 || code == 7 { return Ok(Vec::new()); } anyhow::bail!( "Last.fm API error {code}: {}", body.get("message") .and_then(|value| value.as_str()) .unwrap_or("unknown error") ); } let Some(tag_value) = body.get("toptags").and_then(|value| value.get("tag")) else { return Ok(Vec::new()); }; let mut tags = match tag_value { Value::Array(values) => values.iter().filter_map(tag_from_value).collect::>(), Value::Object(_) => tag_from_value(tag_value).into_iter().collect::>(), _ => Vec::new(), }; tags.sort_by(|a, b| { b.weight .total_cmp(&a.weight) .then_with(|| a.name.cmp(&b.name)) }); tags.truncate(LASTFM_TAG_LIMIT); Ok(tags) } fn tag_from_value(value: &Value) -> Option { let name = value.get("name")?.as_str()?.trim(); let name = clean_tag_name(name)?; let weight = value .get("count") .and_then(lastfm_count_to_f64) .unwrap_or(1.0) .max(1.0); Some(TagCandidate { name, weight }) } fn lastfm_count_to_f64(value: &Value) -> Option { value .as_f64() .or_else(|| value.as_str().and_then(|text| text.parse::().ok())) } async fn save_track_tag_text( pool: &sqlx::PgPool, track_id: i64, tag_text: &str, source: &str, replace_source: bool, ) -> anyhow::Result { let tags = tags_from_text(tag_text); save_entity_tags(pool, "track", track_id, &tags, source, replace_source, true).await } async fn replace_entity_tags( pool: &sqlx::PgPool, entity_kind: &str, entity_id: i64, tags: &[TagCandidate], source: &str, mirror_track_genre: bool, ) -> anyhow::Result { save_entity_tags( pool, entity_kind, entity_id, tags, source, true, mirror_track_genre, ) .await } async fn save_entity_tags( pool: &sqlx::PgPool, entity_kind: &str, entity_id: i64, tags: &[TagCandidate], source: &str, replace_source: bool, mirror_track_genre: bool, ) -> anyhow::Result { if tags.is_empty() { return Ok(0); } if replace_source { sqlx::query( r#"DELETE FROM furumusic__entity_genre_tag WHERE entity_kind = $1 AND entity_id = $2 AND source = $3"#, ) .bind(entity_kind) .bind(entity_id) .bind(source) .execute(pool) .await?; if mirror_track_genre && entity_kind == "track" { sqlx::query( r#"DELETE FROM furumusic__track_genre tg WHERE tg.track_id = $1 AND NOT EXISTS ( SELECT 1 FROM furumusic__entity_genre_tag egt WHERE egt.entity_kind = 'track' AND egt.entity_id = tg.track_id AND egt.genre_id = tg.genre_id )"#, ) .bind(entity_id) .execute(pool) .await?; } } let now = chrono::Utc::now().format("%Y-%m-%dT%H:%M:%SZ").to_string(); let mut saved = 0u64; for tag in tags { let Some(genre_id) = ensure_genre(pool, &tag.name).await? else { continue; }; let result = sqlx::query( r#"INSERT INTO furumusic__entity_genre_tag (entity_kind, entity_id, genre_id, source, weight, updated_at) VALUES ($1, $2, $3, $4, $5, $6) ON CONFLICT (entity_kind, entity_id, genre_id, source) DO NOTHING"#, ) .bind(entity_kind) .bind(entity_id) .bind(genre_id) .bind(source) .bind(tag.weight) .bind(&now) .execute(pool) .await?; saved += result.rows_affected(); if mirror_track_genre && entity_kind == "track" { let result = sqlx::query( r#"INSERT INTO furumusic__track_genre (track_id, genre_id) VALUES ($1, $2) ON CONFLICT (track_id, genre_id) DO NOTHING"#, ) .bind(entity_id) .bind(genre_id) .execute(pool) .await?; saved += result.rows_affected(); } } Ok(saved) } async fn ensure_genre(pool: &sqlx::PgPool, name: &str) -> anyhow::Result> { let Some(name) = clean_tag_name(name) else { return Ok(None); }; let normalized = normalize_tag_name(&name); if normalized.is_empty() || is_ignored_tag(&normalized) { return Ok(None); } let existing: Option = sqlx::query_scalar( r#"SELECT id FROM furumusic__genre WHERE name_normalized = $1 ORDER BY id LIMIT 1"#, ) .bind(&normalized) .fetch_optional(pool) .await?; if existing.is_some() { return Ok(existing); } let id = sqlx::query_scalar::<_, i64>( r#"INSERT INTO furumusic__genre (name, name_normalized) VALUES ($1, $2) ON CONFLICT (name) DO UPDATE SET name = EXCLUDED.name RETURNING id"#, ) .bind(&name) .bind(&normalized) .fetch_one(pool) .await?; Ok(Some(id)) } fn tags_from_text(value: &str) -> Vec { let normalized_separators = value.replace(" / ", ";").replace('|', ";"); let mut tags = Vec::new(); for raw in normalized_separators.split([';', ',']) { if let Some(name) = clean_tag_name(raw) { if !is_ignored_tag(&normalize_tag_name(&name)) && !tags.iter().any(|tag: &TagCandidate| { normalize_tag_name(&tag.name) == normalize_tag_name(&name) }) { tags.push(TagCandidate { name, weight: 1.0 }); } } } tags } fn clean_tag_name(value: &str) -> Option { let cleaned = value.trim().trim_matches('"').trim_matches('\'').trim(); if cleaned.is_empty() { return None; } let cleaned = cleaned.chars().take(100).collect::(); let cleaned = cleaned.split_whitespace().collect::>().join(" "); if cleaned.is_empty() { None } else { Some(cleaned) } } fn normalize_tag_name(value: &str) -> String { value .trim() .to_lowercase() .split_whitespace() .collect::>() .join(" ") } fn is_ignored_tag(normalized: &str) -> bool { matches!( normalized, "" | "unknown" | "undefined" | "none" | "n/a" | "na" | "other" | "misc" | "various" ) } fn should_update(current: Option, overwrite: bool) -> bool { overwrite || current.is_none() } fn should_update_duration(current: Option, overwrite: bool) -> bool { overwrite || current.unwrap_or(0.0) <= 0.0 }