CORE: reworked artwork_backfill.rs task

This commit is contained in:
Ultradesu
2026-05-27 18:07:02 +03:00
parent 59910bc34e
commit 476b300a6c
14 changed files with 1177 additions and 520 deletions
+3
View File
@@ -3,6 +3,7 @@
//! Sources (in priority order):
//! 1. Standalone image files in the album folder (cover.jpg, folder.jpg, etc.)
//! 2. Embedded cover art in audio file metadata (ID3 APIC, Vorbis METADATA_BLOCK_PICTURE, etc.)
//! 3. Remote metadata providers used by background backfill jobs.
//!
//! The first usable image found is saved as a MediaFile with file_type="cover_art"
//! and linked to the Release via cover_file_id.
@@ -26,6 +27,8 @@ pub enum CoverSource {
FolderFile(PathBuf),
/// Embedded in an audio file's metadata.
Embedded(PathBuf),
/// Downloaded from a remote metadata provider.
Remote(String),
}
/// Well-known cover art filenames, in priority order.
-60
View File
@@ -1,60 +0,0 @@
use crate::scheduler::{Job, JobContext, JobLog};
/// Periodic job that auto-assigns artist images from their release covers.
///
/// For every artist that has no `image_file_id`, picks the cover of the most
/// recent release (by year) that has one. Runs after the cover backfill job
/// so freshly-extracted covers are available.
pub struct ArtistImageBackfillJob;
#[async_trait::async_trait]
impl Job for ArtistImageBackfillJob {
fn name(&self) -> &'static str {
"artist_image_backfill"
}
fn description(&self) -> &'static str {
"Auto-assign artist images from release covers"
}
fn default_cron(&self) -> &'static str {
// 03:15 daily — after cover_backfill at 03:00
"0 15 3 * * *"
}
async fn run(&self, ctx: &JobContext, log: &mut JobLog) -> anyhow::Result<()> {
let result = sqlx::query(
"UPDATE furumusic__artist a \
SET image_file_id = ( \
SELECT r.cover_file_id \
FROM furumusic__release_artist ra \
JOIN furumusic__release r ON r.id = ra.release_id \
WHERE ra.artist_id = a.id \
AND r.cover_file_id IS NOT NULL \
ORDER BY r.year DESC NULLS LAST \
LIMIT 1 \
), \
updated_at = $1 \
WHERE a.image_file_id IS NULL \
AND EXISTS ( \
SELECT 1 FROM furumusic__release_artist ra2 \
JOIN furumusic__release r2 ON r2.id = ra2.release_id \
WHERE ra2.artist_id = a.id AND r2.cover_file_id IS NOT NULL \
)",
)
.bind(chrono::Utc::now().format("%Y-%m-%dT%H:%M:%SZ").to_string())
.execute(&ctx.pool)
.await?;
let count = result.rows_affected();
if count > 0 {
log.info(&format!(
"Assigned images to {count} artists from release covers"
));
} else {
log.info("All artists already have images (or no covers available)");
}
Ok(())
}
}
-69
View File
@@ -1,69 +0,0 @@
use crate::scheduler::{Job, JobContext, JobLog};
/// Fallback job that assigns artist images from track cover art.
///
/// The primary `artist_image_backfill` job uses release covers. This job
/// runs afterwards and covers the case where the release itself has no
/// cover but individual tracks do (e.g. when cover art is embedded in the
/// audio file and extracted per-track rather than per-release).
///
/// For every artist that *still* has no `image_file_id` after the release-
/// based backfill, picks the `cover_file_id` of the most recent track
/// (by year, then track id) that has one.
pub struct ArtistTrackImageBackfillJob;
#[async_trait::async_trait]
impl Job for ArtistTrackImageBackfillJob {
fn name(&self) -> &'static str {
"artist_track_image_backfill"
}
fn description(&self) -> &'static str {
"Auto-assign artist images from track covers (fallback)"
}
fn default_cron(&self) -> &'static str {
// 03:30 daily — after artist_image_backfill at 03:15
"0 30 3 * * *"
}
async fn run(&self, ctx: &JobContext, log: &mut JobLog) -> anyhow::Result<()> {
let result = sqlx::query(
"UPDATE furumusic__artist a \
SET image_file_id = ( \
SELECT t.cover_file_id \
FROM furumusic__track_artist ta \
JOIN furumusic__track t ON t.id = ta.track_id \
WHERE ta.artist_id = a.id \
AND t.cover_file_id IS NOT NULL \
AND t.is_hidden = false \
ORDER BY t.year DESC NULLS LAST, t.id DESC \
LIMIT 1 \
), \
updated_at = $1 \
WHERE a.image_file_id IS NULL \
AND a.is_hidden = false \
AND EXISTS ( \
SELECT 1 FROM furumusic__track_artist ta2 \
JOIN furumusic__track t2 ON t2.id = ta2.track_id \
WHERE ta2.artist_id = a.id \
AND t2.cover_file_id IS NOT NULL \
AND t2.is_hidden = false \
)",
)
.bind(chrono::Utc::now().format("%Y-%m-%dT%H:%M:%SZ").to_string())
.execute(&ctx.pool)
.await?;
let count = result.rows_affected();
if count > 0 {
log.info(&format!(
"Assigned images to {count} artists from track covers"
));
} else {
log.info("All artists already have images (or no track covers available)");
}
Ok(())
}
}
+963
View File
@@ -0,0 +1,963 @@
use std::path::{Path, PathBuf};
use reqwest::Client;
use serde::Deserialize;
use crate::agent::cover_art::{self, CoverImage, CoverSource};
use crate::agent::cover_variants;
use crate::scheduler::{Job, JobContext, JobLog};
pub struct ArtworkBackfillJob;
const LASTFM_REQUEST_DELAY: std::time::Duration = std::time::Duration::from_millis(1200);
const MAX_LASTFM_RELEASE_LOOKUPS: i64 = 200;
const MAX_LASTFM_ARTIST_LOOKUPS: i64 = 200;
#[derive(Debug, sqlx::FromRow)]
struct ReleaseCandidate {
id: i64,
title: String,
artist_name: Option<String>,
}
#[derive(Debug, sqlx::FromRow)]
struct ArtistCandidate {
id: i64,
name: String,
}
#[derive(Debug, Deserialize)]
struct LastfmAlbumResponse {
album: Option<LastfmImageContainer>,
error: Option<i32>,
message: Option<String>,
}
#[derive(Debug, Deserialize)]
struct LastfmArtistResponse {
artist: Option<LastfmImageContainer>,
error: Option<i32>,
message: Option<String>,
}
#[derive(Debug, Deserialize)]
struct LastfmTopAlbumsResponse {
topalbums: Option<LastfmTopAlbums>,
error: Option<i32>,
message: Option<String>,
}
#[derive(Debug, Deserialize)]
struct LastfmTopAlbums {
album: Option<OneOrMany<LastfmImageContainer>>,
}
#[derive(Debug, Deserialize)]
#[serde(untagged)]
enum OneOrMany<T> {
One(T),
Many(Vec<T>),
}
impl<T> OneOrMany<T> {
fn into_vec(self) -> Vec<T> {
match self {
Self::One(value) => vec![value],
Self::Many(values) => values,
}
}
}
#[derive(Debug, Deserialize)]
struct LastfmImageContainer {
image: Option<Vec<LastfmImage>>,
}
#[derive(Debug, Deserialize)]
struct LastfmImage {
#[serde(rename = "#text")]
url: String,
size: String,
}
#[derive(Default)]
struct ArtworkStats {
release_local_assigned: u64,
release_lastfm_assigned: u64,
release_lastfm_not_found: u64,
release_skipped_no_audio: u64,
artist_lastfm_assigned: u64,
artist_lastfm_not_found: u64,
variants_created: usize,
variants_unchanged: usize,
variants_missing_original: usize,
failed: u64,
}
#[async_trait::async_trait]
impl Job for ArtworkBackfillJob {
fn name(&self) -> &'static str {
"artwork_backfill"
}
fn description(&self) -> &'static str {
"Backfill and repair release, track, and artist artwork"
}
fn default_cron(&self) -> &'static str {
// Nightly, after inbox processing has had a chance to import new files.
"0 30 3 * * *"
}
async fn run(&self, ctx: &JobContext, log: &mut JobLog) -> anyhow::Result<()> {
let storage_dir = ctx.config.agent_storage_dir.trim();
if storage_dir.is_empty() {
log.warn("agent_storage_dir is not configured, skipping artwork backfill");
return Ok(());
}
let client = Client::builder()
.user_agent(format!(
"furumusic-artwork-backfill/{}",
env!("CARGO_PKG_VERSION")
))
.timeout(std::time::Duration::from_secs(20))
.build()?;
let mut stats = ArtworkStats::default();
backfill_release_local(ctx, log, storage_dir, &mut stats).await?;
let api_key = ctx.config.lastfm_api_key.trim();
if api_key.is_empty() {
log.warn("lastfm_api_key is not configured; skipping Last.fm artwork fallback");
} else {
backfill_release_lastfm(ctx, log, storage_dir, api_key, &client, &mut stats).await?;
backfill_artist_lastfm(ctx, log, storage_dir, api_key, &client, &mut stats).await?;
}
repair_cover_variants(ctx, log, storage_dir, &mut stats).await?;
log.info(&format!(
"Artwork backfill complete: release_local_assigned={}, release_lastfm_assigned={}, release_lastfm_not_found={}, release_skipped_no_audio={}, artist_lastfm_assigned={}, artist_lastfm_not_found={}, variants_created={}, variants_unchanged={}, variants_missing_original={}, failed={}",
stats.release_local_assigned,
stats.release_lastfm_assigned,
stats.release_lastfm_not_found,
stats.release_skipped_no_audio,
stats.artist_lastfm_assigned,
stats.artist_lastfm_not_found,
stats.variants_created,
stats.variants_unchanged,
stats.variants_missing_original,
stats.failed
));
Ok(())
}
}
async fn backfill_release_local(
ctx: &JobContext,
log: &mut JobLog,
storage_dir: &str,
stats: &mut ArtworkStats,
) -> anyhow::Result<()> {
let releases = sqlx::query_as::<_, ReleaseCandidate>(
r#"SELECT r.id,
r.title::text AS title,
(
SELECT a.name::text
FROM furumusic__release_artist ra
JOIN furumusic__artist a ON a.id = ra.artist_id
WHERE ra.release_id = r.id
ORDER BY ra.position
LIMIT 1
) AS artist_name
FROM furumusic__release r
WHERE r.cover_file_id IS NULL
AND r.is_hidden = false
ORDER BY r.id"#,
)
.fetch_all(&ctx.pool)
.await?;
if releases.is_empty() {
log.info("Release local artwork pass: all visible releases already have covers");
return Ok(());
}
log.info(&format!(
"Release local artwork pass: checking {} release(s) without covers",
releases.len()
));
for (index, release) in releases.iter().enumerate() {
log.info(&format!(
"Release local artwork {}/{}: release {} \"{}\"",
index + 1,
releases.len(),
release.id,
release.title
));
let audio_paths: Vec<String> = sqlx::query_scalar(
r#"SELECT mf.file_path::text
FROM furumusic__track t
JOIN furumusic__media_file mf ON mf.id = t.audio_file_id
WHERE t.release_id = $1
AND mf.file_type = 'audio'
ORDER BY t.disc_number NULLS LAST, t.track_number NULLS LAST, t.id"#,
)
.bind(release.id)
.fetch_all(&ctx.pool)
.await
.unwrap_or_default();
if audio_paths.is_empty() {
stats.release_skipped_no_audio += 1;
log.warn(&format!(
"Release {} \"{}\": no audio files found for local cover extraction",
release.id, release.title
));
continue;
}
let audio_files: Vec<PathBuf> = audio_paths
.iter()
.map(|path| resolve_media_path(storage_dir, path))
.collect();
let Some(folder) = audio_files.first().and_then(|path| path.parent()) else {
stats.failed += 1;
log.warn(&format!(
"Release {} \"{}\": could not determine audio folder",
release.id, release.title
));
continue;
};
let Some(cover) = cover_art::find_best_cover(folder, &audio_files).await else {
continue;
};
let source_desc = cover_source_description(&cover.source);
let artist_name = release.artist_name.as_deref().unwrap_or("Unknown Artist");
match cover_art::save_cover_to_storage(
&ctx.db,
&ctx.pool,
storage_dir,
artist_name,
&release.title,
&cover,
)
.await
{
Ok(cover_file_id) => {
cover_art::assign_cover_to_release(&ctx.pool, release.id, cover_file_id).await?;
stats.release_local_assigned += 1;
log.info(&format!(
"Release {} \"{}\": assigned local cover from {source_desc}",
release.id, release.title
));
}
Err(err) => {
stats.failed += 1;
log.warn(&format!(
"Release {} \"{}\": failed to save local cover: {err}",
release.id, release.title
));
}
}
}
Ok(())
}
async fn backfill_release_lastfm(
ctx: &JobContext,
log: &mut JobLog,
storage_dir: &str,
api_key: &str,
client: &Client,
stats: &mut ArtworkStats,
) -> anyhow::Result<()> {
let failed_cutoff = cutoff_iso(1);
let not_found_cutoff = cutoff_iso(30);
let releases = sqlx::query_as::<_, ReleaseCandidate>(
r#"SELECT r.id,
r.title::text AS title,
COALESCE(
(
SELECT a.name::text
FROM furumusic__release_artist ra
JOIN furumusic__artist a ON a.id = ra.artist_id
WHERE ra.release_id = r.id
ORDER BY ra.position
LIMIT 1
),
(
SELECT a.name::text
FROM furumusic__track t
JOIN furumusic__track_artist ta ON ta.track_id = t.id
JOIN furumusic__artist a ON a.id = ta.artist_id
WHERE t.release_id = r.id AND ta.role <> 'featuring'
ORDER BY t.disc_number NULLS LAST, t.track_number NULLS LAST, ta.position
LIMIT 1
)
) AS artist_name
FROM furumusic__release r
LEFT JOIN furumusic__artwork_lookup_state s
ON s.entity_kind = 'release'
AND s.entity_id = r.id
AND s.source = 'lastfm'
WHERE r.cover_file_id IS NULL
AND r.is_hidden = false
AND (
s.entity_id IS NULL
OR s.status = 'failed' AND s.last_attempt_at < $1
OR s.status = 'not_found' AND (s.attempt_count < 3 OR s.last_attempt_at < $2)
OR s.status = 'found' AND s.last_attempt_at < $1
)
ORDER BY s.last_attempt_at NULLS FIRST, r.id
LIMIT $3"#,
)
.bind(&failed_cutoff)
.bind(&not_found_cutoff)
.bind(MAX_LASTFM_RELEASE_LOOKUPS)
.fetch_all(&ctx.pool)
.await?;
if releases.is_empty() {
log.info("Release Last.fm artwork pass: no eligible releases need lookup");
return Ok(());
}
log.info(&format!(
"Release Last.fm artwork pass: looking up {} release(s)",
releases.len()
));
for (index, release) in releases.iter().enumerate() {
let Some(artist_name) = release
.artist_name
.as_deref()
.map(str::trim)
.filter(|value| !value.is_empty())
else {
stats.release_lastfm_not_found += 1;
record_lookup_state(
&ctx.pool,
"release",
release.id,
"not_found",
Some("release has no primary artist for Last.fm lookup"),
None,
)
.await?;
log.warn(&format!(
"Release {} \"{}\": no primary artist for Last.fm lookup",
release.id, release.title
));
continue;
};
log.info(&format!(
"Release Last.fm artwork {}/{}: release {} \"{}\" by \"{}\"",
index + 1,
releases.len(),
release.id,
release.title,
artist_name
));
match fetch_lastfm_album_image(client, api_key, artist_name, &release.title).await {
Ok(Some(image_url)) => match download_remote_cover(client, &image_url).await {
Ok(cover) => match cover_art::save_cover_to_storage(
&ctx.db,
&ctx.pool,
storage_dir,
artist_name,
&release.title,
&cover,
)
.await
{
Ok(cover_file_id) => {
cover_art::assign_cover_to_release(&ctx.pool, release.id, cover_file_id)
.await?;
record_lookup_state(
&ctx.pool,
"release",
release.id,
"found",
None,
Some(&image_url),
)
.await?;
stats.release_lastfm_assigned += 1;
log.info(&format!(
"Release {} \"{}\": assigned Last.fm cover",
release.id, release.title
));
}
Err(err) => {
stats.failed += 1;
record_lookup_state(
&ctx.pool,
"release",
release.id,
"failed",
Some(&err.to_string()),
Some(&image_url),
)
.await?;
log.warn(&format!(
"Release {} \"{}\": failed to save Last.fm cover: {err}",
release.id, release.title
));
}
},
Err(err) => {
stats.failed += 1;
record_lookup_state(
&ctx.pool,
"release",
release.id,
"failed",
Some(&err.to_string()),
Some(&image_url),
)
.await?;
log.warn(&format!(
"Release {} \"{}\": failed to download Last.fm cover: {err}",
release.id, release.title
));
}
},
Ok(None) => {
stats.release_lastfm_not_found += 1;
record_lookup_state(&ctx.pool, "release", release.id, "not_found", None, None)
.await?;
log.info(&format!(
"Release {} \"{}\": Last.fm did not return artwork",
release.id, release.title
));
}
Err(err) if err.to_string().contains("rate limit") => {
stats.failed += 1;
record_lookup_state(
&ctx.pool,
"release",
release.id,
"failed",
Some(&err.to_string()),
None,
)
.await?;
log.error(
"Last.fm rate limit exceeded during release artwork lookup; stopping this pass",
);
break;
}
Err(err) => {
stats.failed += 1;
record_lookup_state(
&ctx.pool,
"release",
release.id,
"failed",
Some(&err.to_string()),
None,
)
.await?;
log.warn(&format!(
"Release {} \"{}\": Last.fm artwork lookup failed: {err}",
release.id, release.title
));
}
}
tokio::time::sleep(LASTFM_REQUEST_DELAY).await;
}
Ok(())
}
async fn backfill_artist_lastfm(
ctx: &JobContext,
log: &mut JobLog,
storage_dir: &str,
api_key: &str,
client: &Client,
stats: &mut ArtworkStats,
) -> anyhow::Result<()> {
let failed_cutoff = cutoff_iso(1);
let not_found_cutoff = cutoff_iso(30);
let artists = sqlx::query_as::<_, ArtistCandidate>(
r#"SELECT a.id, a.name::text AS name
FROM furumusic__artist a
LEFT JOIN furumusic__artwork_lookup_state s
ON s.entity_kind = 'artist'
AND s.entity_id = a.id
AND s.source = 'lastfm'
WHERE a.image_file_id IS NULL
AND a.is_hidden = false
AND (
s.entity_id IS NULL
OR s.status = 'failed' AND s.last_attempt_at < $1
OR s.status = 'not_found' AND (s.attempt_count < 3 OR s.last_attempt_at < $2)
OR s.status = 'found' AND s.last_attempt_at < $1
)
ORDER BY s.last_attempt_at NULLS FIRST, a.id
LIMIT $3"#,
)
.bind(&failed_cutoff)
.bind(&not_found_cutoff)
.bind(MAX_LASTFM_ARTIST_LOOKUPS)
.fetch_all(&ctx.pool)
.await?;
if artists.is_empty() {
log.info("Artist Last.fm artwork pass: no eligible artists need lookup");
return Ok(());
}
log.info(&format!(
"Artist Last.fm artwork pass: looking up {} artist(s)",
artists.len()
));
for (index, artist) in artists.iter().enumerate() {
log.info(&format!(
"Artist Last.fm artwork {}/{}: artist {} \"{}\"",
index + 1,
artists.len(),
artist.id,
artist.name
));
match fetch_lastfm_artist_image(client, api_key, &artist.name).await {
Ok(Some(image_url)) => match download_remote_cover(client, &image_url).await {
Ok(cover) => match cover_art::save_cover_to_storage(
&ctx.db,
&ctx.pool,
storage_dir,
&artist.name,
"__artist_image__",
&cover,
)
.await
{
Ok(image_file_id) => {
sqlx::query(
r#"UPDATE furumusic__artist
SET image_file_id = $1,
updated_at = $3
WHERE id = $2
AND image_file_id IS NULL"#,
)
.bind(image_file_id)
.bind(artist.id)
.bind(now_iso())
.execute(&ctx.pool)
.await?;
record_lookup_state(
&ctx.pool,
"artist",
artist.id,
"found",
None,
Some(&image_url),
)
.await?;
stats.artist_lastfm_assigned += 1;
log.info(&format!(
"Artist {} \"{}\": assigned Last.fm image",
artist.id, artist.name
));
}
Err(err) => {
stats.failed += 1;
record_lookup_state(
&ctx.pool,
"artist",
artist.id,
"failed",
Some(&err.to_string()),
Some(&image_url),
)
.await?;
log.warn(&format!(
"Artist {} \"{}\": failed to save Last.fm image: {err}",
artist.id, artist.name
));
}
},
Err(err) => {
stats.failed += 1;
record_lookup_state(
&ctx.pool,
"artist",
artist.id,
"failed",
Some(&err.to_string()),
Some(&image_url),
)
.await?;
log.warn(&format!(
"Artist {} \"{}\": failed to download Last.fm image: {err}",
artist.id, artist.name
));
}
},
Ok(None) => {
stats.artist_lastfm_not_found += 1;
record_lookup_state(&ctx.pool, "artist", artist.id, "not_found", None, None)
.await?;
log.info(&format!(
"Artist {} \"{}\": Last.fm did not return artwork",
artist.id, artist.name
));
}
Err(err) if err.to_string().contains("rate limit") => {
stats.failed += 1;
record_lookup_state(
&ctx.pool,
"artist",
artist.id,
"failed",
Some(&err.to_string()),
None,
)
.await?;
log.error(
"Last.fm rate limit exceeded during artist artwork lookup; stopping this pass",
);
break;
}
Err(err) => {
stats.failed += 1;
record_lookup_state(
&ctx.pool,
"artist",
artist.id,
"failed",
Some(&err.to_string()),
None,
)
.await?;
log.warn(&format!(
"Artist {} \"{}\": Last.fm artwork lookup failed: {err}",
artist.id, artist.name
));
}
}
tokio::time::sleep(LASTFM_REQUEST_DELAY).await;
}
Ok(())
}
async fn repair_cover_variants(
ctx: &JobContext,
log: &mut JobLog,
storage_dir: &str,
stats: &mut ArtworkStats,
) -> anyhow::Result<()> {
let rows: Vec<(i64, String)> = sqlx::query_as(
"SELECT id, file_path FROM furumusic__media_file WHERE file_type = 'cover_art' ORDER BY id",
)
.fetch_all(&ctx.pool)
.await?;
if rows.is_empty() {
log.info("Cover variant pass: no cover art media files found");
return Ok(());
}
log.info(&format!(
"Cover variant pass: checking {} cover art media file(s)",
rows.len()
));
for (media_file_id, file_path) in rows {
let path = resolve_media_path(storage_dir, &file_path);
if !path.exists() {
stats.variants_missing_original += 1;
log.warn(&format!(
"Media file {media_file_id}: original cover not found at {}",
path.display()
));
continue;
}
match cover_variants::ensure_cover_variants(&path).await {
Ok(0) => stats.variants_unchanged += 1,
Ok(count) => {
stats.variants_created += count;
log.info(&format!(
"Media file {media_file_id}: created {count} variant(s)"
));
}
Err(err) => {
stats.failed += 1;
log.warn(&format!(
"Media file {media_file_id}: failed to create variants: {err}"
));
}
}
}
Ok(())
}
async fn fetch_lastfm_album_image(
client: &Client,
api_key: &str,
artist: &str,
album: &str,
) -> anyhow::Result<Option<String>> {
let response = client
.get("https://ws.audioscrobbler.com/2.0/")
.query(&[
("method", "album.getInfo"),
("api_key", api_key),
("artist", artist),
("album", album),
("autocorrect", "1"),
("format", "json"),
])
.send()
.await?;
let body = response.text().await?;
let parsed: LastfmAlbumResponse = serde_json::from_str(&body)?;
if let Some(code) = parsed.error {
if code == 6 || code == 7 {
return Ok(None);
}
if code == 29 {
anyhow::bail!("Last.fm rate limit exceeded");
}
anyhow::bail!(
"Last.fm API error {code}: {}",
parsed.message.unwrap_or_default()
);
}
Ok(parsed
.album
.and_then(|album| choose_best_image(album.image)))
}
async fn fetch_lastfm_artist_image(
client: &Client,
api_key: &str,
artist: &str,
) -> anyhow::Result<Option<String>> {
let response = client
.get("https://ws.audioscrobbler.com/2.0/")
.query(&[
("method", "artist.getInfo"),
("api_key", api_key),
("artist", artist),
("autocorrect", "1"),
("format", "json"),
])
.send()
.await?;
let body = response.text().await?;
let parsed: LastfmArtistResponse = serde_json::from_str(&body)?;
if let Some(code) = parsed.error {
if code == 6 || code == 7 {
return Ok(None);
}
if code == 29 {
anyhow::bail!("Last.fm rate limit exceeded");
}
anyhow::bail!(
"Last.fm API error {code}: {}",
parsed.message.unwrap_or_default()
);
}
if let Some(url) = parsed
.artist
.and_then(|artist| choose_best_image(artist.image))
{
return Ok(Some(url));
}
fetch_lastfm_artist_top_album_image(client, api_key, artist).await
}
async fn fetch_lastfm_artist_top_album_image(
client: &Client,
api_key: &str,
artist: &str,
) -> anyhow::Result<Option<String>> {
let response = client
.get("https://ws.audioscrobbler.com/2.0/")
.query(&[
("method", "artist.getTopAlbums"),
("api_key", api_key),
("artist", artist),
("autocorrect", "1"),
("limit", "10"),
("format", "json"),
])
.send()
.await?;
let body = response.text().await?;
let parsed: LastfmTopAlbumsResponse = serde_json::from_str(&body)?;
if let Some(code) = parsed.error {
if code == 6 || code == 7 {
return Ok(None);
}
if code == 29 {
anyhow::bail!("Last.fm rate limit exceeded");
}
anyhow::bail!(
"Last.fm API error {code}: {}",
parsed.message.unwrap_or_default()
);
}
let albums = parsed
.topalbums
.and_then(|topalbums| topalbums.album)
.map(OneOrMany::into_vec)
.unwrap_or_default();
Ok(albums
.into_iter()
.filter_map(|album| choose_best_image(album.image))
.next())
}
fn choose_best_image(images: Option<Vec<LastfmImage>>) -> Option<String> {
let mut images = images.unwrap_or_default();
images.sort_by_key(|image| image_size_rank(&image.size));
images
.into_iter()
.rev()
.map(|image| image.url.trim().to_string())
.find(|url| is_usable_lastfm_image(url))
}
fn image_size_rank(size: &str) -> u8 {
match size {
"mega" => 5,
"extralarge" => 4,
"large" => 3,
"medium" => 2,
"small" => 1,
_ => 0,
}
}
fn is_usable_lastfm_image(url: &str) -> bool {
let value = url.trim();
!value.is_empty()
&& !value.contains("2a96cbd8b46e442fc41c2b86b821562f")
&& !value.contains("default_")
}
async fn download_remote_cover(client: &Client, url: &str) -> anyhow::Result<CoverImage> {
let response = client.get(url).send().await?;
if !response.status().is_success() {
anyhow::bail!("image download failed with HTTP {}", response.status());
}
let header_mime = response
.headers()
.get(reqwest::header::CONTENT_TYPE)
.and_then(|value| value.to_str().ok())
.and_then(normalize_image_mime);
let data = response.bytes().await?.to_vec();
if data.is_empty() {
anyhow::bail!("downloaded image is empty");
}
let mime_type = header_mime
.or_else(|| guess_image_mime(&data))
.ok_or_else(|| anyhow::anyhow!("downloaded file is not a supported image"))?;
Ok(CoverImage {
data,
mime_type,
source: CoverSource::Remote(url.to_string()),
})
}
fn normalize_image_mime(value: &str) -> Option<String> {
let mime = value.split(';').next()?.trim().to_ascii_lowercase();
match mime.as_str() {
"image/jpeg" | "image/jpg" => Some("image/jpeg".to_string()),
"image/png" => Some("image/png".to_string()),
"image/webp" => Some("image/webp".to_string()),
"image/gif" => Some("image/gif".to_string()),
"image/bmp" => Some("image/bmp".to_string()),
_ => None,
}
}
fn guess_image_mime(data: &[u8]) -> Option<String> {
if data.starts_with(&[0xFF, 0xD8, 0xFF]) {
Some("image/jpeg".to_string())
} else if data.starts_with(&[0x89, 0x50, 0x4E, 0x47]) {
Some("image/png".to_string())
} else if data.starts_with(b"RIFF") && data.len() > 12 && &data[8..12] == b"WEBP" {
Some("image/webp".to_string())
} else if data.starts_with(b"GIF8") {
Some("image/gif".to_string())
} else if data.starts_with(&[0x42, 0x4D]) {
Some("image/bmp".to_string())
} else {
None
}
}
async fn record_lookup_state(
pool: &sqlx::PgPool,
entity_kind: &str,
entity_id: i64,
status: &str,
error: Option<&str>,
source_url: Option<&str>,
) -> anyhow::Result<()> {
sqlx::query(
r#"INSERT INTO furumusic__artwork_lookup_state
(entity_kind, entity_id, source, status, attempt_count, last_attempt_at, last_error, source_url)
VALUES ($1, $2, 'lastfm', $3, 1, $4, $5, $6)
ON CONFLICT (entity_kind, entity_id, source) DO UPDATE SET
status = EXCLUDED.status,
attempt_count = furumusic__artwork_lookup_state.attempt_count + 1,
last_attempt_at = EXCLUDED.last_attempt_at,
last_error = EXCLUDED.last_error,
source_url = EXCLUDED.source_url"#,
)
.bind(entity_kind)
.bind(entity_id)
.bind(status)
.bind(now_iso())
.bind(error)
.bind(source_url)
.execute(pool)
.await?;
Ok(())
}
fn cover_source_description(source: &CoverSource) -> String {
match source {
CoverSource::FolderFile(path) => format!("folder: {}", path.display()),
CoverSource::Embedded(path) => format!("embedded: {}", path.display()),
CoverSource::Remote(url) => format!("remote: {url}"),
}
}
fn resolve_media_path(storage_dir: &str, file_path: &str) -> PathBuf {
let path = PathBuf::from(file_path);
if path.is_absolute() {
path
} else {
Path::new(storage_dir).join(path)
}
}
fn cutoff_iso(days: i64) -> String {
(chrono::Utc::now() - chrono::Duration::days(days))
.format("%Y-%m-%dT%H:%M:%SZ")
.to_string()
}
fn now_iso() -> String {
chrono::Utc::now().format("%Y-%m-%dT%H:%M:%SZ").to_string()
}
-167
View File
@@ -1,167 +0,0 @@
use std::path::{Path, PathBuf};
use crate::agent::cover_art;
use crate::scheduler::{Job, JobContext, JobLog};
/// One-shot / periodic job that finds releases without cover art and attempts
/// to extract or discover covers from their audio files in storage.
pub struct CoverBackfillJob;
#[async_trait::async_trait]
impl Job for CoverBackfillJob {
fn name(&self) -> &'static str {
"cover_backfill"
}
fn description(&self) -> &'static str {
"Backfill cover art for releases missing covers"
}
fn default_cron(&self) -> &'static str {
// Once a day at 03:00
"0 0 3 * * *"
}
async fn run(&self, ctx: &JobContext, log: &mut JobLog) -> anyhow::Result<()> {
let storage_dir = &ctx.config.agent_storage_dir;
if storage_dir.is_empty() {
log.warn("agent_storage_dir is not configured, skipping cover backfill");
return Ok(());
}
// Find all releases without a cover
let rows: Vec<(i64, String)> = sqlx::query_as(
"SELECT r.id, r.title \
FROM furumusic__release r \
WHERE r.cover_file_id IS NULL \
ORDER BY r.id",
)
.fetch_all(&ctx.pool)
.await?;
if rows.is_empty() {
log.info("All releases already have cover art, nothing to backfill");
return Ok(());
}
log.info(&format!(
"Found {} releases without cover art, starting backfill...",
rows.len()
));
let mut assigned = 0u32;
let mut failed = 0u32;
let mut skipped_no_audio = 0u32;
let mut skipped_no_cover = 0u32;
let total = rows.len();
for (i, (release_id, release_title)) in rows.iter().enumerate() {
log.info(&format!(
"[{}/{}] Processing release {release_id} \"{release_title}\"...",
i + 1,
total,
));
// Find audio files belonging to this release via tracks → media_file
let audio_paths: Vec<(String,)> = sqlx::query_as(
"SELECT mf.file_path \
FROM furumusic__track t \
JOIN furumusic__media_file mf ON mf.id = t.audio_file_id \
WHERE t.release_id = $1 AND mf.file_type = 'audio'",
)
.bind(release_id)
.fetch_all(&ctx.pool)
.await
.unwrap_or_default();
if audio_paths.is_empty() {
log.warn(&format!(
"Release {release_id} \"{release_title}\": no audio files found, skipping"
));
skipped_no_audio += 1;
continue;
}
// Determine the folder from the first audio file's path
let first_path = Path::new(&audio_paths[0].0);
let folder = first_path.parent().unwrap_or(Path::new("."));
// Collect all audio file paths as PathBuf
let audio_files: Vec<PathBuf> =
audio_paths.iter().map(|(p,)| PathBuf::from(p)).collect();
// Try to find cover art
let cover = match cover_art::find_best_cover(folder, &audio_files).await {
Some(c) => c,
None => {
log.info(&format!(
"Release {release_id} \"{release_title}\": no cover image found in {} audio files, skipping",
audio_files.len(),
));
skipped_no_cover += 1;
continue;
}
};
let source_desc = match &cover.source {
cover_art::CoverSource::FolderFile(p) => format!("folder: {}", p.display()),
cover_art::CoverSource::Embedded(p) => format!("embedded: {}", p.display()),
};
// Look up artist name for storage path
let artist_name: String = sqlx::query_scalar(
"SELECT a.name FROM furumusic__artist a \
JOIN furumusic__release_artist ra ON ra.artist_id = a.id \
WHERE ra.release_id = $1 \
ORDER BY ra.position LIMIT 1",
)
.bind(release_id)
.fetch_optional(&ctx.pool)
.await
.ok()
.flatten()
.unwrap_or_else(|| "Unknown Artist".to_string());
match cover_art::save_cover_to_storage(
&ctx.db,
&ctx.pool,
storage_dir,
&artist_name,
release_title,
&cover,
)
.await
{
Ok(cover_file_id) => {
if let Err(e) =
cover_art::assign_cover_to_release(&ctx.pool, *release_id, cover_file_id)
.await
{
log.warn(&format!(
"Release {release_id} \"{release_title}\": saved cover but failed to assign: {e}"
));
failed += 1;
} else {
log.info(&format!(
"Release {release_id} \"{release_title}\": assigned cover from {source_desc}"
));
assigned += 1;
}
}
Err(e) => {
log.warn(&format!(
"Release {release_id} \"{release_title}\": failed to save cover: {e}"
));
failed += 1;
}
}
}
log.info(&format!(
"Cover backfill complete: {assigned} assigned, {failed} failed, \
{skipped_no_audio} skipped (no audio), {skipped_no_cover} skipped (no cover found)"
));
Ok(())
}
}
-96
View File
@@ -1,96 +0,0 @@
use std::path::{Path, PathBuf};
use crate::agent::cover_variants;
use crate::scheduler::{Job, JobContext, JobLog};
pub struct CoverVariantBackfillJob;
#[async_trait::async_trait]
impl Job for CoverVariantBackfillJob {
fn name(&self) -> &'static str {
"cover_variant_backfill"
}
fn description(&self) -> &'static str {
"Generate missing resized cover image variants"
}
fn default_cron(&self) -> &'static str {
// Once a day after cover extraction and artist image assignment.
"0 45 3 * * *"
}
async fn run(&self, ctx: &JobContext, log: &mut JobLog) -> anyhow::Result<()> {
let storage_dir = &ctx.config.agent_storage_dir;
if storage_dir.is_empty() {
log.warn("agent_storage_dir is not configured, skipping cover variant backfill");
return Ok(());
}
let rows: Vec<(i64, String)> = sqlx::query_as(
"SELECT id, file_path FROM furumusic__media_file WHERE file_type = 'cover_art' ORDER BY id",
)
.fetch_all(&ctx.pool)
.await?;
if rows.is_empty() {
log.info("No cover art media files found");
return Ok(());
}
log.info(&format!(
"Found {} cover art media file(s), checking variants...",
rows.len()
));
let mut created = 0usize;
let mut unchanged = 0usize;
let mut missing_original = 0usize;
let mut failed = 0usize;
for (media_file_id, file_path) in rows {
let path = resolve_media_path(storage_dir, &file_path);
if !path.exists() {
missing_original += 1;
log.warn(&format!(
"Media file {media_file_id}: original cover not found at {}",
path.display()
));
continue;
}
match cover_variants::ensure_cover_variants(&path).await {
Ok(0) => unchanged += 1,
Ok(count) => {
created += count;
log.info(&format!(
"Media file {media_file_id}: created {count} variant(s)"
));
}
Err(err) => {
failed += 1;
log.warn(&format!(
"Media file {media_file_id}: failed to create variants: {err}"
));
}
}
}
log.info(&format!(
"Cover variant backfill complete: {created} variant(s) created, \
{unchanged} original(s) already complete, {missing_original} missing original(s), \
{failed} failed original(s)"
));
Ok(())
}
}
fn resolve_media_path(storage_dir: &str, file_path: &str) -> PathBuf {
let path = PathBuf::from(file_path);
if path.is_absolute() {
path
} else {
Path::new(storage_dir).join(path)
}
}
+3
View File
@@ -887,6 +887,9 @@ pub async fn finalize_approved(
crate::agent::cover_art::CoverSource::Embedded(p) => {
format!("embedded in: {}", p.display())
}
crate::agent::cover_art::CoverSource::Remote(url) => {
format!("remote: {url}")
}
};
match crate::agent::cover_art::save_cover_to_storage(
db,
+1 -4
View File
@@ -1,7 +1,4 @@
pub mod artist_image_backfill;
pub mod artist_track_image_backfill;
pub mod cover_backfill;
pub mod cover_variant_backfill;
pub mod artwork_backfill;
pub mod inbox_discover;
pub mod inbox_process;
pub mod lastfm_popularity;
+1 -4
View File
@@ -50,10 +50,7 @@ fn build_registry() -> Arc<JobRegistry> {
registry.register(jobs::inbox_discover::InboxDiscoverJob);
registry.register(jobs::inbox_process::InboxProcessJob);
registry.register(jobs::inbox_process::FileProcessJob);
registry.register(jobs::cover_backfill::CoverBackfillJob);
registry.register(jobs::artist_image_backfill::ArtistImageBackfillJob);
registry.register(jobs::artist_track_image_backfill::ArtistTrackImageBackfillJob);
registry.register(jobs::cover_variant_backfill::CoverVariantBackfillJob);
registry.register(jobs::artwork_backfill::ArtworkBackfillJob);
registry.register(jobs::metadata_backfill::MetadataBackfillJob);
registry.register(jobs::lastfm_popularity::LastfmPopularityJob);
registry.register(jobs::lastfm_scrobble::LastfmScrobbleJob);
+47
View File
@@ -1769,6 +1769,52 @@ pub mod db_migrations {
&[Operation::custom(create_lastfm_scrobbling).build()];
}
// -- M0034: Artwork lookup state --------------------------------------
#[cot::db::migrations::migration_op]
async fn create_artwork_lookup_state(
ctx: migrations::MigrationContext<'_>,
) -> cot::db::Result<()> {
ctx.db
.raw(
"CREATE TABLE IF NOT EXISTS furumusic__artwork_lookup_state (
id BIGSERIAL PRIMARY KEY,
entity_kind VARCHAR(32) NOT NULL,
entity_id BIGINT NOT NULL,
source VARCHAR(32) NOT NULL,
status VARCHAR(32) NOT NULL,
attempt_count INTEGER NOT NULL DEFAULT 0,
last_attempt_at VARCHAR(32) NOT NULL,
last_error TEXT,
source_url TEXT,
UNIQUE(entity_kind, entity_id, source)
)",
)
.await?;
ctx.db
.raw(
"CREATE INDEX IF NOT EXISTS idx_artwork_lookup_state_retry
ON furumusic__artwork_lookup_state (entity_kind, source, status, last_attempt_at)",
)
.await?;
Ok(())
}
#[derive(Debug, Copy, Clone)]
pub struct M0034CreateArtworkLookupState;
impl migrations::Migration for M0034CreateArtworkLookupState {
const APP_NAME: &'static str = "furumusic";
const MIGRATION_NAME: &'static str = "m_0034_create_artwork_lookup_state";
const DEPENDENCIES: &'static [migrations::MigrationDependency] =
&[migrations::MigrationDependency::migration(
"furumusic",
"m_0033_create_lastfm_scrobbling",
)];
const OPERATIONS: &'static [Operation] =
&[Operation::custom(create_artwork_lookup_state).build()];
}
pub const MIGRATIONS: &[&SyncDynMigration] = &[
&M0006CreateMediaFile,
&M0007CreateArtist,
@@ -1793,5 +1839,6 @@ pub mod db_migrations {
&M0031CreateTorrentSession,
&M0032CreateLastfmTrackPopularity,
&M0033CreateLastfmScrobbling,
&M0034CreateArtworkLookupState,
];
}
+141 -86
View File
@@ -460,6 +460,98 @@ async fn update_lastfm_account_error(
Ok(())
}
async fn enqueue_lastfm_scrobble(
pool: &sqlx::PgPool,
config: &AppConfig,
user_id: i64,
track_id: i64,
started_at: Option<i64>,
listened_seconds: i32,
) -> cot::Result<LastfmActionResponse> {
if !crate::lastfm::is_configured(config) {
return Ok(LastfmActionResponse {
ok: false,
queued: false,
sent: false,
message: Some("Last.fm is not configured".to_string()),
});
}
if load_lastfm_account(pool, user_id).await?.is_none() {
return Ok(LastfmActionResponse {
ok: false,
queued: false,
sent: false,
message: Some("Last.fm account is not connected".to_string()),
});
}
let Some(track) = load_lastfm_track_payload(pool, track_id).await? else {
return Ok(LastfmActionResponse {
ok: false,
queued: false,
sent: false,
message: Some("Track has no primary artist for Last.fm".to_string()),
});
};
let duration_seconds = track.duration_seconds.unwrap_or(0).max(0);
if duration_seconds <= 30 {
return Ok(LastfmActionResponse {
ok: false,
queued: false,
sent: false,
message: Some("Track is too short to scrobble".to_string()),
});
}
let threshold = ((duration_seconds as f64 / 2.0).min(240.0)).ceil() as i32;
let listened_seconds = listened_seconds.max(0);
if listened_seconds < threshold {
return Ok(LastfmActionResponse {
ok: false,
queued: false,
sent: false,
message: Some("Scrobble threshold has not been reached".to_string()),
});
}
let now_ts = chrono::Utc::now().timestamp();
let started_at = started_at
.unwrap_or(now_ts - listened_seconds as i64)
.min(now_ts);
let now = chrono::Utc::now().format("%Y-%m-%dT%H:%M:%SZ").to_string();
let dedupe_key = format!("{user_id}:{track_id}:{started_at}");
sqlx::query(
r#"INSERT INTO furumusic__lastfm_scrobble_outbox
(user_id, track_id, started_at, listened_seconds, duration_seconds, status, created_at, updated_at, dedupe_key)
VALUES ($1, $2, $3, $4, $5, 'pending', $6, $6, $7)
ON CONFLICT (dedupe_key) DO NOTHING"#,
)
.bind(user_id)
.bind(track_id)
.bind(started_at)
.bind(listened_seconds)
.bind(duration_seconds)
.bind(&now)
.bind(&dedupe_key)
.execute(pool)
.await
.map_err(|e| cot::Error::internal(e.to_string()))?;
let sent = match crate::lastfm::process_pending_scrobbles(pool, config, Some(user_id), 10).await
{
Ok(summary) => summary.sent > 0,
Err(err) => {
tracing::warn!("Last.fm immediate scrobble send failed: {err:#}");
false
}
};
Ok(LastfmActionResponse {
ok: true,
queued: true,
sent,
message: None,
})
}
async fn lastfm_now_playing_handler(
session: Session,
db: Database,
@@ -542,93 +634,17 @@ async fn lastfm_scrobble_handler(
return Ok(json_error(StatusCode::UNAUTHORIZED, "not authenticated"));
};
let (config, _) = AppConfig::load_with_db(&db).await;
if !crate::lastfm::is_configured(&config) {
return Json(LastfmActionResponse {
ok: false,
queued: false,
sent: false,
message: Some("Last.fm is not configured".to_string()),
})
.into_response();
}
if load_lastfm_account(pool, user.id).await?.is_none() {
return Json(LastfmActionResponse {
ok: false,
queued: false,
sent: false,
message: Some("Last.fm account is not connected".to_string()),
})
.into_response();
}
let Some(track) = load_lastfm_track_payload(pool, entry.track_id).await? else {
return Json(LastfmActionResponse {
ok: false,
queued: false,
sent: false,
message: Some("Track has no primary artist for Last.fm".to_string()),
})
.into_response();
};
let duration_seconds = track.duration_seconds.unwrap_or(0).max(0);
if duration_seconds <= 30 {
return Json(LastfmActionResponse {
ok: false,
queued: false,
sent: false,
message: Some("Track is too short to scrobble".to_string()),
})
.into_response();
}
let threshold = ((duration_seconds as f64 / 2.0).min(240.0)).ceil() as i32;
let listened_seconds = entry.listened_seconds.max(0);
if listened_seconds < threshold {
return Json(LastfmActionResponse {
ok: false,
queued: false,
sent: false,
message: Some("Scrobble threshold has not been reached".to_string()),
})
.into_response();
}
let now_ts = chrono::Utc::now().timestamp();
let started_at = entry
.started_at
.unwrap_or(now_ts - listened_seconds as i64)
.min(now_ts);
let now = chrono::Utc::now().format("%Y-%m-%dT%H:%M:%SZ").to_string();
let dedupe_key = format!("{}:{}:{}", user.id, entry.track_id, started_at);
sqlx::query(
r#"INSERT INTO furumusic__lastfm_scrobble_outbox
(user_id, track_id, started_at, listened_seconds, duration_seconds, status, created_at, updated_at, dedupe_key)
VALUES ($1, $2, $3, $4, $5, 'pending', $6, $6, $7)
ON CONFLICT (dedupe_key) DO NOTHING"#,
Json(
enqueue_lastfm_scrobble(
pool,
&config,
user.id,
entry.track_id,
entry.started_at,
entry.listened_seconds,
)
.await?,
)
.bind(user.id)
.bind(entry.track_id)
.bind(started_at)
.bind(listened_seconds)
.bind(duration_seconds)
.bind(&now)
.bind(&dedupe_key)
.execute(pool)
.await
.map_err(|e| cot::Error::internal(e.to_string()))?;
let sent =
match crate::lastfm::process_pending_scrobbles(pool, &config, Some(user.id), 10).await {
Ok(summary) => summary.sent > 0,
Err(err) => {
tracing::warn!("Last.fm immediate scrobble send failed: {err:#}");
false
}
};
Json(LastfmActionResponse {
ok: true,
queued: true,
sent,
message: None,
})
.into_response()
}
@@ -1944,6 +1960,45 @@ async fn history_handler(
.await
.map_err(|e| cot::Error::internal(e.to_string()))?;
if let Some(listened_seconds) = entry.duration_listened {
let (config, _) = AppConfig::load_with_db(&db).await;
match enqueue_lastfm_scrobble(
pool,
&config,
user.id,
entry.track_id,
entry.started_at,
listened_seconds,
)
.await
{
Ok(result) if result.queued => {
tracing::info!(
user_id = user.id,
track_id = entry.track_id,
sent = result.sent,
"Queued Last.fm scrobble from play history"
);
}
Ok(result) => {
tracing::debug!(
user_id = user.id,
track_id = entry.track_id,
message = ?result.message,
"Play history did not queue Last.fm scrobble"
);
}
Err(err) => {
tracing::warn!(
user_id = user.id,
track_id = entry.track_id,
error = %err,
"Failed to queue Last.fm scrobble from play history"
);
}
}
}
Json(serde_json::json!({"ok": true})).into_response()
}
+1
View File
@@ -3,6 +3,7 @@ use serde::Deserialize;
#[derive(Debug, Deserialize)]
pub(super) struct HistoryEntry {
pub(super) track_id: i64,
pub(super) started_at: Option<i64>,
pub(super) duration_listened: Option<i32>,
pub(super) completed: bool,
}
+1
View File
@@ -1360,6 +1360,7 @@ async fn run_scheduled_job(
if !live_config.agent_enabled
&& job_name != "lastfm_popularity"
&& job_name != "lastfm_scrobble"
&& job_name != "artwork_backfill"
{
tracing::warn!(job = job_name, "Skipping: agent_enabled=false");
return;