This commit is contained in:
@@ -48,7 +48,9 @@ impl Job for ArtistImageBackfillJob {
|
||||
|
||||
let count = result.rows_affected();
|
||||
if count > 0 {
|
||||
log.info(&format!("Assigned images to {count} artists from release covers"));
|
||||
log.info(&format!(
|
||||
"Assigned images to {count} artists from release covers"
|
||||
));
|
||||
} else {
|
||||
log.info("All artists already have images (or no covers available)");
|
||||
}
|
||||
|
||||
@@ -87,10 +87,8 @@ impl Job for CoverBackfillJob {
|
||||
let folder = first_path.parent().unwrap_or(Path::new("."));
|
||||
|
||||
// Collect all audio file paths as PathBuf
|
||||
let audio_files: Vec<PathBuf> = audio_paths
|
||||
.iter()
|
||||
.map(|(p,)| PathBuf::from(p))
|
||||
.collect();
|
||||
let audio_files: Vec<PathBuf> =
|
||||
audio_paths.iter().map(|(p,)| PathBuf::from(p)).collect();
|
||||
|
||||
// Try to find cover art
|
||||
let cover = match cover_art::find_best_cover(folder, &audio_files).await {
|
||||
@@ -135,12 +133,9 @@ impl Job for CoverBackfillJob {
|
||||
.await
|
||||
{
|
||||
Ok(cover_file_id) => {
|
||||
if let Err(e) = cover_art::assign_cover_to_release(
|
||||
&ctx.pool,
|
||||
*release_id,
|
||||
cover_file_id,
|
||||
)
|
||||
.await
|
||||
if let Err(e) =
|
||||
cover_art::assign_cover_to_release(&ctx.pool, *release_id, cover_file_id)
|
||||
.await
|
||||
{
|
||||
log.warn(&format!(
|
||||
"Release {release_id} \"{release_title}\": saved cover but failed to assign: {e}"
|
||||
|
||||
+45
-26
@@ -30,7 +30,10 @@ impl Job for InboxDiscoverJob {
|
||||
|
||||
async fn run(&self, ctx: &JobContext, log: &mut JobLog) -> anyhow::Result<()> {
|
||||
// Prevent overlapping discover runs
|
||||
if DISCOVER_RUNNING.compare_exchange(false, true, Ordering::SeqCst, Ordering::SeqCst).is_err() {
|
||||
if DISCOVER_RUNNING
|
||||
.compare_exchange(false, true, Ordering::SeqCst, Ordering::SeqCst)
|
||||
.is_err()
|
||||
{
|
||||
log.info("Another inbox_discover is already running, skipping");
|
||||
return Ok(());
|
||||
}
|
||||
@@ -82,31 +85,38 @@ impl Job for InboxDiscoverJob {
|
||||
}
|
||||
Ok(false) => {}
|
||||
Err(e) => {
|
||||
log.warn(&format!("Error checking existing review for {}: {e}", input_path_str));
|
||||
log.warn(&format!(
|
||||
"Error checking existing review for {}: {e}",
|
||||
input_path_str
|
||||
));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Compute SHA-256 hash
|
||||
let path_clone = file_path.to_path_buf();
|
||||
let (hash, file_size) = match tokio::task::spawn_blocking(move || -> anyhow::Result<(String, i64)> {
|
||||
let data = std::fs::read(&path_clone)?;
|
||||
let digest = Sha256::digest(&data);
|
||||
let hash = format!("{:x}", digest);
|
||||
let size = data.len() as i64;
|
||||
Ok((hash, size))
|
||||
})
|
||||
.await?
|
||||
{
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
log.warn(&format!("Failed to hash {}: {e}", file_path.display()));
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let (hash, file_size) =
|
||||
match tokio::task::spawn_blocking(move || -> anyhow::Result<(String, i64)> {
|
||||
let data = std::fs::read(&path_clone)?;
|
||||
let digest = Sha256::digest(&data);
|
||||
let hash = format!("{:x}", digest);
|
||||
let size = data.len() as i64;
|
||||
Ok((hash, size))
|
||||
})
|
||||
.await?
|
||||
{
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
log.warn(&format!("Failed to hash {}: {e}", file_path.display()));
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
// Skip if hash already in media_files
|
||||
if crate::agent::rag::file_hash_exists(&ctx.pool, &hash).await.unwrap_or(false) {
|
||||
if crate::agent::rag::file_hash_exists(&ctx.pool, &hash)
|
||||
.await
|
||||
.unwrap_or(false)
|
||||
{
|
||||
skipped_hash += 1;
|
||||
continue;
|
||||
}
|
||||
@@ -120,7 +130,10 @@ impl Job for InboxDiscoverJob {
|
||||
{
|
||||
Ok(m) => m,
|
||||
Err(e) => {
|
||||
log.warn(&format!("Failed to extract metadata from {}: {e}", file_path.display()));
|
||||
log.warn(&format!(
|
||||
"Failed to extract metadata from {}: {e}",
|
||||
file_path.display()
|
||||
));
|
||||
continue;
|
||||
}
|
||||
};
|
||||
@@ -140,6 +153,9 @@ impl Job for InboxDiscoverJob {
|
||||
"raw_year": raw_meta.year,
|
||||
"raw_genre": raw_meta.genre,
|
||||
"duration_secs": raw_meta.duration_secs,
|
||||
"audio_bitrate": raw_meta.audio_bitrate,
|
||||
"audio_sample_rate": raw_meta.audio_sample_rate,
|
||||
"audio_bit_depth": raw_meta.audio_bit_depth,
|
||||
"path_title": hints.title,
|
||||
"path_artist": hints.artist,
|
||||
"path_album": hints.album,
|
||||
@@ -172,7 +188,9 @@ impl Job for InboxDiscoverJob {
|
||||
// and no orchestrator is already running
|
||||
if discovered > 0 {
|
||||
if crate::jobs::inbox_process::is_orchestrator_running() {
|
||||
log.info("New files discovered but inbox_process already running, it will pick them up");
|
||||
log.info(
|
||||
"New files discovered but inbox_process already running, it will pick them up",
|
||||
);
|
||||
} else {
|
||||
log.info("Spawning inbox_process in background...");
|
||||
let config = ctx.config.clone();
|
||||
@@ -181,11 +199,15 @@ impl Job for InboxDiscoverJob {
|
||||
let registry = ctx.registry.clone();
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = crate::scheduler::trigger_job_now(
|
||||
&config, &db, &pool, ®istry, "inbox_process",
|
||||
&config,
|
||||
&db,
|
||||
&pool,
|
||||
®istry,
|
||||
"inbox_process",
|
||||
)
|
||||
.await
|
||||
{
|
||||
tracing::error!("Background inbox_process trigger failed: {e}");
|
||||
tracing::error!("Background inbox_process trigger failed: {e}");
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -214,10 +236,7 @@ pub fn group_by_folder(files: &[PathBuf]) -> Vec<(PathBuf, Vec<PathBuf>)> {
|
||||
groups
|
||||
}
|
||||
|
||||
pub async fn collect_audio_files(
|
||||
dir: &Path,
|
||||
audio: &mut Vec<PathBuf>,
|
||||
) -> anyhow::Result<()> {
|
||||
pub async fn collect_audio_files(dir: &Path, audio: &mut Vec<PathBuf>) -> anyhow::Result<()> {
|
||||
let mut entries = tokio::fs::read_dir(dir).await?;
|
||||
while let Some(entry) = entries.next_entry().await? {
|
||||
let name = entry.file_name().to_string_lossy().into_owned();
|
||||
|
||||
+136
-85
@@ -20,12 +20,10 @@ pub fn is_orchestrator_running() -> bool {
|
||||
/// Try to acquire the PostgreSQL advisory lock for the orchestrator.
|
||||
/// Returns true if the lock was acquired (no other orchestrator is running).
|
||||
async fn try_acquire_orchestrator_lock(pool: &sqlx::PgPool) -> bool {
|
||||
match sqlx::query_scalar::<_, bool>(
|
||||
"SELECT pg_try_advisory_lock($1)"
|
||||
)
|
||||
.bind(ORCHESTRATOR_ADVISORY_LOCK_ID)
|
||||
.fetch_one(pool)
|
||||
.await
|
||||
match sqlx::query_scalar::<_, bool>("SELECT pg_try_advisory_lock($1)")
|
||||
.bind(ORCHESTRATOR_ADVISORY_LOCK_ID)
|
||||
.fetch_one(pool)
|
||||
.await
|
||||
{
|
||||
Ok(acquired) => acquired,
|
||||
Err(e) => {
|
||||
@@ -43,14 +41,12 @@ async fn release_orchestrator_lock(pool: &sqlx::PgPool) {
|
||||
.await;
|
||||
}
|
||||
|
||||
use crate::config::AppConfig;
|
||||
use crate::music::{
|
||||
Artist, MediaFile, Release, ReleaseArtist, Track, TrackArtist,
|
||||
};
|
||||
use crate::scheduler::{Job, JobContext, JobLog, JobRun, PendingReview, ProcessingStats};
|
||||
use crate::agent::dto::{FolderContext, NormalizedFields, RawMetadata, PathHints};
|
||||
use crate::agent::normalize::BatchFileInput;
|
||||
use crate::agent::dto::{FolderContext, NormalizedFields, PathHints, RawMetadata};
|
||||
use crate::agent::mover;
|
||||
use crate::agent::normalize::BatchFileInput;
|
||||
use crate::config::AppConfig;
|
||||
use crate::music::{Artist, MediaFile, Release, ReleaseArtist, Track, TrackArtist};
|
||||
use crate::scheduler::{Job, JobContext, JobLog, JobRun, PendingReview, ProcessingStats};
|
||||
|
||||
const AUDIO_EXTENSIONS: &[&str] = &[
|
||||
"mp3", "flac", "ogg", "opus", "aac", "m4a", "wav", "ape", "wv", "wma", "tta", "aiff", "aif",
|
||||
@@ -83,8 +79,13 @@ impl Job for InboxProcessJob {
|
||||
previous_value = prev,
|
||||
"inbox_process: checking ORCHESTRATOR_RUNNING AtomicBool"
|
||||
);
|
||||
if ORCHESTRATOR_RUNNING.compare_exchange(false, true, Ordering::SeqCst, Ordering::SeqCst).is_err() {
|
||||
log.info("Another inbox_process orchestrator is already running (AtomicBool), skipping");
|
||||
if ORCHESTRATOR_RUNNING
|
||||
.compare_exchange(false, true, Ordering::SeqCst, Ordering::SeqCst)
|
||||
.is_err()
|
||||
{
|
||||
log.info(
|
||||
"Another inbox_process orchestrator is already running (AtomicBool), skipping",
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
struct AtomicGuard;
|
||||
@@ -115,7 +116,9 @@ impl Job for InboxProcessJob {
|
||||
});
|
||||
}
|
||||
}
|
||||
let _advisory_guard = AdvisoryGuard { pool: pool_for_unlock };
|
||||
let _advisory_guard = AdvisoryGuard {
|
||||
pool: pool_for_unlock,
|
||||
};
|
||||
|
||||
let config = Arc::clone(&ctx.config);
|
||||
let mut total_ok = 0u64;
|
||||
@@ -151,9 +154,9 @@ impl Job for InboxProcessJob {
|
||||
folder_rel, file_count,
|
||||
));
|
||||
|
||||
let (ok, fail) = process_folder_batch(
|
||||
&ctx.db, &config, &ctx.pool, &folder_rel, reviews, log,
|
||||
).await;
|
||||
let (ok, fail) =
|
||||
process_folder_batch(&ctx.db, &config, &ctx.pool, &folder_rel, reviews, log)
|
||||
.await;
|
||||
|
||||
total_ok += ok;
|
||||
total_fail += fail;
|
||||
@@ -296,7 +299,7 @@ async fn process_folder_batch(
|
||||
let _ = review.set_processing(db).await;
|
||||
|
||||
// Parse context_json
|
||||
let context: serde_json::Value = review
|
||||
let mut context: serde_json::Value = review
|
||||
.context_json
|
||||
.as_deref()
|
||||
.and_then(|s| serde_json::from_str(s).ok())
|
||||
@@ -304,40 +307,51 @@ async fn process_folder_batch(
|
||||
|
||||
// Extract metadata (with 60s timeout)
|
||||
let path_for_meta = file_path.to_path_buf();
|
||||
let meta_future = tokio::task::spawn_blocking(move || {
|
||||
crate::agent::metadata::extract(&path_for_meta)
|
||||
});
|
||||
let raw_meta = match tokio::time::timeout(
|
||||
std::time::Duration::from_secs(60),
|
||||
meta_future,
|
||||
).await {
|
||||
Ok(Ok(Ok(m))) => m,
|
||||
Ok(Ok(Err(e))) => {
|
||||
let msg = format!("{filename}: metadata error: {e}");
|
||||
log.error(&msg);
|
||||
let _ = review.set_failed(db, &msg).await;
|
||||
failed_reviews.push(review);
|
||||
continue;
|
||||
}
|
||||
Ok(Err(e)) => {
|
||||
let msg = format!("{filename}: metadata panic: {e}");
|
||||
log.error(&msg);
|
||||
let _ = review.set_failed(db, &msg).await;
|
||||
failed_reviews.push(review);
|
||||
continue;
|
||||
}
|
||||
Err(_) => {
|
||||
let msg = format!("{filename}: metadata timeout (60s)");
|
||||
log.error(&msg);
|
||||
let _ = review.set_failed(db, &msg).await;
|
||||
failed_reviews.push(review);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let meta_future =
|
||||
tokio::task::spawn_blocking(move || crate::agent::metadata::extract(&path_for_meta));
|
||||
let raw_meta =
|
||||
match tokio::time::timeout(std::time::Duration::from_secs(60), meta_future).await {
|
||||
Ok(Ok(Ok(m))) => m,
|
||||
Ok(Ok(Err(e))) => {
|
||||
let msg = format!("{filename}: metadata error: {e}");
|
||||
log.error(&msg);
|
||||
let _ = review.set_failed(db, &msg).await;
|
||||
failed_reviews.push(review);
|
||||
continue;
|
||||
}
|
||||
Ok(Err(e)) => {
|
||||
let msg = format!("{filename}: metadata panic: {e}");
|
||||
log.error(&msg);
|
||||
let _ = review.set_failed(db, &msg).await;
|
||||
failed_reviews.push(review);
|
||||
continue;
|
||||
}
|
||||
Err(_) => {
|
||||
let msg = format!("{filename}: metadata timeout (60s)");
|
||||
log.error(&msg);
|
||||
let _ = review.set_failed(db, &msg).await;
|
||||
failed_reviews.push(review);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
// Parse path hints
|
||||
let relative = file_path.strip_prefix(inbox_path).unwrap_or(file_path);
|
||||
let hints = crate::agent::path_hints::parse(relative);
|
||||
if let Some(context_obj) = context.as_object_mut() {
|
||||
context_obj.insert(
|
||||
"audio_bitrate".to_owned(),
|
||||
serde_json::json!(raw_meta.audio_bitrate),
|
||||
);
|
||||
context_obj.insert(
|
||||
"audio_sample_rate".to_owned(),
|
||||
serde_json::json!(raw_meta.audio_sample_rate),
|
||||
);
|
||||
context_obj.insert(
|
||||
"audio_bit_depth".to_owned(),
|
||||
serde_json::json!(raw_meta.audio_bit_depth),
|
||||
);
|
||||
}
|
||||
|
||||
prepared.push(PreparedFile {
|
||||
review,
|
||||
@@ -366,14 +380,20 @@ async fn process_folder_batch(
|
||||
let mut album_queries: Vec<String> = Vec::new();
|
||||
|
||||
for p in &prepared {
|
||||
let artist_q = p.raw_meta.artist.as_deref()
|
||||
let artist_q = p
|
||||
.raw_meta
|
||||
.artist
|
||||
.as_deref()
|
||||
.or(p.hints.artist.as_deref())
|
||||
.unwrap_or("")
|
||||
.to_owned();
|
||||
if !artist_q.is_empty() && !artist_queries.contains(&artist_q) {
|
||||
artist_queries.push(artist_q);
|
||||
}
|
||||
let album_q = p.raw_meta.album.as_deref()
|
||||
let album_q = p
|
||||
.raw_meta
|
||||
.album
|
||||
.as_deref()
|
||||
.or(p.hints.album.as_deref())
|
||||
.unwrap_or("")
|
||||
.to_owned();
|
||||
@@ -388,10 +408,15 @@ async fn process_folder_batch(
|
||||
match tokio::time::timeout(
|
||||
std::time::Duration::from_secs(30),
|
||||
crate::agent::rag::find_similar_artists(pool, q, 5),
|
||||
).await {
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(Ok(results)) => {
|
||||
for a in results {
|
||||
if !all_similar_artists.iter().any(|x: &crate::agent::dto::SimilarArtist| x.id == a.id) {
|
||||
if !all_similar_artists
|
||||
.iter()
|
||||
.any(|x: &crate::agent::dto::SimilarArtist| x.id == a.id)
|
||||
{
|
||||
all_similar_artists.push(a);
|
||||
}
|
||||
}
|
||||
@@ -406,10 +431,15 @@ async fn process_folder_batch(
|
||||
match tokio::time::timeout(
|
||||
std::time::Duration::from_secs(30),
|
||||
crate::agent::rag::find_similar_releases(pool, q, 5),
|
||||
).await {
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(Ok(results)) => {
|
||||
for r in results {
|
||||
if !all_similar_releases.iter().any(|x: &crate::agent::dto::SimilarRelease| x.id == r.id) {
|
||||
if !all_similar_releases
|
||||
.iter()
|
||||
.any(|x: &crate::agent::dto::SimilarRelease| x.id == r.id)
|
||||
{
|
||||
all_similar_releases.push(r);
|
||||
}
|
||||
}
|
||||
@@ -458,8 +488,9 @@ async fn process_folder_batch(
|
||||
};
|
||||
|
||||
// Build batch input
|
||||
let batch_files: Vec<BatchFileInput> = prepared.iter().map(|p| {
|
||||
BatchFileInput {
|
||||
let batch_files: Vec<BatchFileInput> = prepared
|
||||
.iter()
|
||||
.map(|p| BatchFileInput {
|
||||
filename: p.filename.clone(),
|
||||
raw: RawMetadata {
|
||||
title: p.raw_meta.title.clone(),
|
||||
@@ -469,6 +500,9 @@ async fn process_folder_batch(
|
||||
year: p.raw_meta.year,
|
||||
genre: p.raw_meta.genre.clone(),
|
||||
duration_secs: p.raw_meta.duration_secs,
|
||||
audio_bitrate: p.raw_meta.audio_bitrate,
|
||||
audio_sample_rate: p.raw_meta.audio_sample_rate,
|
||||
audio_bit_depth: p.raw_meta.audio_bit_depth,
|
||||
},
|
||||
hints: PathHints {
|
||||
title: p.hints.title.clone(),
|
||||
@@ -477,8 +511,8 @@ async fn process_folder_batch(
|
||||
year: p.hints.year,
|
||||
track_number: p.hints.track_number,
|
||||
},
|
||||
}
|
||||
}).collect();
|
||||
})
|
||||
.collect();
|
||||
|
||||
let system_prompt = include_str!("../../prompts/normalize_batch.txt");
|
||||
let context_limit = config.agent_context_limit;
|
||||
@@ -493,7 +527,8 @@ async fn process_folder_batch(
|
||||
&all_similar_artists,
|
||||
&all_similar_releases,
|
||||
Some(&folder_ctx),
|
||||
).await;
|
||||
)
|
||||
.await;
|
||||
|
||||
let batch_result = match llm_result {
|
||||
Ok(r) => r,
|
||||
@@ -506,7 +541,9 @@ async fn process_folder_batch(
|
||||
}
|
||||
let total_fail_count = failed_reviews.len() as u64 + file_count as u64;
|
||||
let duration_ms = batch_start.elapsed().as_millis() as i64;
|
||||
let _ = run.set_failed(db, duration_ms, &log.output(), &err_msg).await;
|
||||
let _ = run
|
||||
.set_failed(db, duration_ms, &log.output(), &err_msg)
|
||||
.await;
|
||||
return (0, total_fail_count);
|
||||
}
|
||||
};
|
||||
@@ -524,9 +561,7 @@ async fn process_folder_batch(
|
||||
log.info("Phase 4: finalizing...");
|
||||
|
||||
// Build lookup map: filename → NormalizedFields
|
||||
let result_map: HashMap<String, NormalizedFields> = batch_result.results
|
||||
.into_iter()
|
||||
.collect();
|
||||
let result_map: HashMap<String, NormalizedFields> = batch_result.results.into_iter().collect();
|
||||
|
||||
let llm_model = &batch_result.model;
|
||||
let prompt_per_file = batch_result.prompt_tokens / prepared.len().max(1) as u64;
|
||||
@@ -558,7 +593,8 @@ async fn process_folder_batch(
|
||||
duration_per_file,
|
||||
prompt_per_file as i64,
|
||||
completion_per_file as i64,
|
||||
).await;
|
||||
)
|
||||
.await;
|
||||
|
||||
let result_json = serde_json::to_string(normalized).unwrap_or_default();
|
||||
let confidence = normalized.confidence.unwrap_or(0.0);
|
||||
@@ -573,7 +609,9 @@ async fn process_folder_batch(
|
||||
normalized.artist.as_deref().unwrap_or("-"),
|
||||
normalized.album.as_deref().unwrap_or("-"),
|
||||
normalized.title.as_deref().unwrap_or("-"),
|
||||
normalized.track_number.map_or("-".into(), |n| n.to_string()),
|
||||
normalized
|
||||
.track_number
|
||||
.map_or("-".into(), |n| n.to_string()),
|
||||
normalized.year.map_or("-".into(), |y| y.to_string()),
|
||||
confidence,
|
||||
feat,
|
||||
@@ -586,9 +624,17 @@ async fn process_folder_batch(
|
||||
|
||||
if confidence >= config.agent_confidence_threshold {
|
||||
match finalize_approved(
|
||||
db, pool, config, &input_path_str, normalized, &p.context,
|
||||
&config.agent_storage_dir, Some(llm_model),
|
||||
).await {
|
||||
db,
|
||||
pool,
|
||||
config,
|
||||
&input_path_str,
|
||||
normalized,
|
||||
&p.context,
|
||||
&config.agent_storage_dir,
|
||||
Some(llm_model),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(()) => {
|
||||
let _ = p.review.set_auto_approved(db).await;
|
||||
ok_count += 1;
|
||||
@@ -604,7 +650,8 @@ async fn process_folder_batch(
|
||||
p.review.status = cot::db::LimitedString::new("pending").unwrap();
|
||||
p.review.updated_at = cot::db::LimitedString::new(
|
||||
&chrono::Utc::now().format("%Y-%m-%dT%H:%M:%SZ").to_string(),
|
||||
).unwrap();
|
||||
)
|
||||
.unwrap();
|
||||
let _ = p.review.save(db).await;
|
||||
log.info(&format!(
|
||||
"{filename}: manual review (confidence {confidence} < {})",
|
||||
@@ -669,10 +716,7 @@ pub async fn finalize_approved(
|
||||
.map_err(|e| anyhow::anyhow!("failed to link release-artist: {e}"))?;
|
||||
}
|
||||
|
||||
let sha256 = context
|
||||
.get("sha256")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("");
|
||||
let sha256 = context.get("sha256").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let file_size = context
|
||||
.get("file_size")
|
||||
.and_then(|v| v.as_i64())
|
||||
@@ -681,6 +725,18 @@ pub async fn finalize_approved(
|
||||
.get("duration_secs")
|
||||
.and_then(|v| v.as_f64())
|
||||
.unwrap_or(0.0);
|
||||
let audio_bitrate = context
|
||||
.get("audio_bitrate")
|
||||
.and_then(|v| v.as_i64())
|
||||
.and_then(|v| i32::try_from(v).ok());
|
||||
let audio_sample_rate = context
|
||||
.get("audio_sample_rate")
|
||||
.and_then(|v| v.as_i64())
|
||||
.and_then(|v| i32::try_from(v).ok());
|
||||
let audio_bit_depth = context
|
||||
.get("audio_bit_depth")
|
||||
.and_then(|v| v.as_i64())
|
||||
.and_then(|v| i32::try_from(v).ok());
|
||||
|
||||
let source_path = Path::new(input_path_str);
|
||||
let original_filename = source_path
|
||||
@@ -746,9 +802,9 @@ pub async fn finalize_approved(
|
||||
file_size,
|
||||
sha256,
|
||||
Some(ext),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
audio_bitrate,
|
||||
audio_sample_rate,
|
||||
audio_bit_depth,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| anyhow::anyhow!("failed to create media file: {e}"))?;
|
||||
@@ -785,9 +841,7 @@ pub async fn finalize_approved(
|
||||
|
||||
// Cover art: if the release has no cover yet, try to find one
|
||||
if release.cover_file_id.is_none() {
|
||||
let source_folder = Path::new(input_path_str)
|
||||
.parent()
|
||||
.unwrap_or(Path::new("."));
|
||||
let source_folder = Path::new(input_path_str).parent().unwrap_or(Path::new("."));
|
||||
|
||||
// Collect audio files in the same folder to try embedded extraction
|
||||
let audio_files_in_folder: Vec<std::path::PathBuf> = std::fs::read_dir(source_folder)
|
||||
@@ -955,10 +1009,7 @@ fn truncate_path(path: &str, max_len: usize) -> String {
|
||||
} else if max_len <= 3 {
|
||||
".".repeat(max_len)
|
||||
} else {
|
||||
let suffix: String = path
|
||||
.chars()
|
||||
.skip(char_count - (max_len - 3))
|
||||
.collect();
|
||||
let suffix: String = path.chars().skip(char_count - (max_len - 3)).collect();
|
||||
format!("...{suffix}")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,234 @@
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use crate::scheduler::{Job, JobContext, JobLog};
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct MetadataBackfillOptions {
|
||||
pub audio_bitrate: bool,
|
||||
pub audio_sample_rate: bool,
|
||||
pub audio_bit_depth: bool,
|
||||
pub duration_seconds: bool,
|
||||
pub overwrite: bool,
|
||||
}
|
||||
|
||||
impl MetadataBackfillOptions {
|
||||
pub fn any_field(self) -> bool {
|
||||
self.audio_bitrate
|
||||
|| self.audio_sample_rate
|
||||
|| self.audio_bit_depth
|
||||
|| self.duration_seconds
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(sqlx::FromRow)]
|
||||
struct BackfillRow {
|
||||
media_file_id: i64,
|
||||
file_path: String,
|
||||
audio_bitrate: Option<i32>,
|
||||
audio_sample_rate: Option<i32>,
|
||||
audio_bit_depth: Option<i32>,
|
||||
track_id: Option<i64>,
|
||||
duration_seconds: Option<f64>,
|
||||
}
|
||||
|
||||
pub struct MetadataBackfillJob;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl Job for MetadataBackfillJob {
|
||||
fn name(&self) -> &'static str {
|
||||
"metadata_backfill"
|
||||
}
|
||||
|
||||
fn description(&self) -> &'static str {
|
||||
"Backfill technical audio metadata from existing files"
|
||||
}
|
||||
|
||||
fn default_cron(&self) -> &'static str {
|
||||
""
|
||||
}
|
||||
|
||||
async fn run(&self, ctx: &JobContext, log: &mut JobLog) -> anyhow::Result<()> {
|
||||
run_with_options(
|
||||
ctx,
|
||||
log,
|
||||
MetadataBackfillOptions {
|
||||
audio_bitrate: true,
|
||||
audio_sample_rate: true,
|
||||
audio_bit_depth: true,
|
||||
duration_seconds: true,
|
||||
overwrite: false,
|
||||
},
|
||||
)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn run_with_options(
|
||||
ctx: &JobContext,
|
||||
log: &mut JobLog,
|
||||
options: MetadataBackfillOptions,
|
||||
) -> anyhow::Result<()> {
|
||||
if !options.any_field() {
|
||||
log.warn("No metadata fields selected; nothing to backfill");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let rows = sqlx::query_as::<_, BackfillRow>(
|
||||
"SELECT mf.id AS media_file_id, mf.file_path, \
|
||||
mf.audio_bitrate, mf.audio_sample_rate, mf.audio_bit_depth, \
|
||||
t.id AS track_id, t.duration_seconds \
|
||||
FROM furumusic__media_file mf \
|
||||
LEFT JOIN furumusic__track t ON t.audio_file_id = mf.id \
|
||||
WHERE mf.file_type = 'audio' \
|
||||
ORDER BY mf.id",
|
||||
)
|
||||
.fetch_all(&ctx.pool)
|
||||
.await?;
|
||||
|
||||
log.info(&format!(
|
||||
"Metadata backfill started: {} audio file(s), mode={}",
|
||||
rows.len(),
|
||||
if options.overwrite {
|
||||
"overwrite"
|
||||
} else {
|
||||
"fill_missing"
|
||||
}
|
||||
));
|
||||
|
||||
let mut scanned = 0u64;
|
||||
let mut media_updated = 0u64;
|
||||
let mut track_updated = 0u64;
|
||||
let mut unchanged = 0u64;
|
||||
let mut missing = 0u64;
|
||||
let mut failed = 0u64;
|
||||
|
||||
for row in rows {
|
||||
scanned += 1;
|
||||
let Some(path) = resolve_media_path(&row.file_path, &ctx.config.agent_storage_dir) else {
|
||||
missing += 1;
|
||||
log.warn(&format!("missing file: {}", row.file_path));
|
||||
continue;
|
||||
};
|
||||
|
||||
let extract_path = path.clone();
|
||||
let raw_meta = match tokio::task::spawn_blocking(move || {
|
||||
crate::agent::metadata::extract(&extract_path)
|
||||
})
|
||||
.await
|
||||
{
|
||||
Ok(Ok(meta)) => meta,
|
||||
Ok(Err(e)) => {
|
||||
failed += 1;
|
||||
log.warn(&format!("metadata error for {}: {e}", path.display()));
|
||||
continue;
|
||||
}
|
||||
Err(e) => {
|
||||
failed += 1;
|
||||
log.warn(&format!("metadata task failed for {}: {e}", path.display()));
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let mut changed_media = false;
|
||||
let mut next_bitrate = row.audio_bitrate;
|
||||
let mut next_sample_rate = row.audio_sample_rate;
|
||||
let mut next_bit_depth = row.audio_bit_depth;
|
||||
|
||||
if options.audio_bitrate && should_update(row.audio_bitrate, options.overwrite) {
|
||||
if let Some(value) = raw_meta.audio_bitrate {
|
||||
next_bitrate = Some(value);
|
||||
changed_media = next_bitrate != row.audio_bitrate || changed_media;
|
||||
}
|
||||
}
|
||||
if options.audio_sample_rate && should_update(row.audio_sample_rate, options.overwrite) {
|
||||
if let Some(value) = raw_meta.audio_sample_rate {
|
||||
next_sample_rate = Some(value);
|
||||
changed_media = next_sample_rate != row.audio_sample_rate || changed_media;
|
||||
}
|
||||
}
|
||||
if options.audio_bit_depth && should_update(row.audio_bit_depth, options.overwrite) {
|
||||
if let Some(value) = raw_meta.audio_bit_depth {
|
||||
next_bit_depth = Some(value);
|
||||
changed_media = next_bit_depth != row.audio_bit_depth || changed_media;
|
||||
}
|
||||
}
|
||||
|
||||
let mut changed_track = false;
|
||||
let mut next_duration = row.duration_seconds;
|
||||
if options.duration_seconds
|
||||
&& row.track_id.is_some()
|
||||
&& should_update_duration(row.duration_seconds, options.overwrite)
|
||||
{
|
||||
if let Some(value) = raw_meta.duration_secs {
|
||||
next_duration = Some(value);
|
||||
changed_track = row
|
||||
.duration_seconds
|
||||
.map(|current| (current - value).abs() > 0.001)
|
||||
.unwrap_or(true);
|
||||
}
|
||||
}
|
||||
|
||||
if changed_media {
|
||||
sqlx::query(
|
||||
"UPDATE furumusic__media_file \
|
||||
SET audio_bitrate = $1, audio_sample_rate = $2, audio_bit_depth = $3 \
|
||||
WHERE id = $4",
|
||||
)
|
||||
.bind(next_bitrate)
|
||||
.bind(next_sample_rate)
|
||||
.bind(next_bit_depth)
|
||||
.bind(row.media_file_id)
|
||||
.execute(&ctx.pool)
|
||||
.await?;
|
||||
media_updated += 1;
|
||||
}
|
||||
|
||||
if changed_track {
|
||||
if let (Some(track_id), Some(duration)) = (row.track_id, next_duration) {
|
||||
sqlx::query("UPDATE furumusic__track SET duration_seconds = $1 WHERE id = $2")
|
||||
.bind(duration)
|
||||
.bind(track_id)
|
||||
.execute(&ctx.pool)
|
||||
.await?;
|
||||
track_updated += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if !changed_media && !changed_track {
|
||||
unchanged += 1;
|
||||
}
|
||||
|
||||
if scanned % 100 == 0 {
|
||||
log.info(&format!(
|
||||
"Progress: {scanned} scanned, {media_updated} media updated, {track_updated} tracks updated, {unchanged} unchanged, {missing} missing, {failed} failed"
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
log.info(&format!(
|
||||
"Metadata backfill complete: {scanned} scanned, {media_updated} media updated, {track_updated} tracks updated, {unchanged} unchanged, {missing} missing, {failed} failed"
|
||||
));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn should_update<T>(current: Option<T>, overwrite: bool) -> bool {
|
||||
overwrite || current.is_none()
|
||||
}
|
||||
|
||||
fn should_update_duration(current: Option<f64>, overwrite: bool) -> bool {
|
||||
overwrite || current.unwrap_or(0.0) <= 0.0
|
||||
}
|
||||
|
||||
fn resolve_media_path(file_path: &str, storage_dir: &str) -> Option<PathBuf> {
|
||||
let path = Path::new(file_path);
|
||||
if path.exists() {
|
||||
return Some(path.to_path_buf());
|
||||
}
|
||||
if path.is_relative() && !storage_dir.is_empty() {
|
||||
let joined = Path::new(storage_dir).join(path);
|
||||
if joined.exists() {
|
||||
return Some(joined);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
@@ -3,3 +3,4 @@ pub mod artist_track_image_backfill;
|
||||
pub mod cover_backfill;
|
||||
pub mod inbox_discover;
|
||||
pub mod inbox_process;
|
||||
pub mod metadata_backfill;
|
||||
|
||||
Reference in New Issue
Block a user