JOBS: fixed metadata_backfill.rs
This commit is contained in:
+157
-26
@@ -84,6 +84,12 @@ struct LastfmTagStats {
|
|||||||
failed: u64,
|
failed: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
enum LastfmTagPassResult {
|
||||||
|
Completed,
|
||||||
|
RateLimited,
|
||||||
|
}
|
||||||
|
|
||||||
pub struct MetadataBackfillJob;
|
pub struct MetadataBackfillJob;
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
@@ -345,9 +351,21 @@ async fn backfill_lastfm_tags(
|
|||||||
.build()?;
|
.build()?;
|
||||||
|
|
||||||
let mut stats = LastfmTagStats::default();
|
let mut stats = LastfmTagStats::default();
|
||||||
backfill_lastfm_artist_tags(ctx, log, &client, api_key, overwrite, &mut stats).await?;
|
if backfill_lastfm_artist_tags(ctx, log, &client, api_key, overwrite, &mut stats).await?
|
||||||
backfill_lastfm_release_tags(ctx, log, &client, api_key, overwrite, &mut stats).await?;
|
== LastfmTagPassResult::RateLimited
|
||||||
backfill_lastfm_track_tags(ctx, log, &client, api_key, overwrite, &mut stats).await?;
|
{
|
||||||
|
return Ok(stats);
|
||||||
|
}
|
||||||
|
if backfill_lastfm_release_tags(ctx, log, &client, api_key, overwrite, &mut stats).await?
|
||||||
|
== LastfmTagPassResult::RateLimited
|
||||||
|
{
|
||||||
|
return Ok(stats);
|
||||||
|
}
|
||||||
|
if backfill_lastfm_track_tags(ctx, log, &client, api_key, overwrite, &mut stats).await?
|
||||||
|
== LastfmTagPassResult::RateLimited
|
||||||
|
{
|
||||||
|
return Ok(stats);
|
||||||
|
}
|
||||||
Ok(stats)
|
Ok(stats)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -358,7 +376,7 @@ async fn backfill_lastfm_artist_tags(
|
|||||||
api_key: &str,
|
api_key: &str,
|
||||||
overwrite: bool,
|
overwrite: bool,
|
||||||
stats: &mut LastfmTagStats,
|
stats: &mut LastfmTagStats,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<LastfmTagPassResult> {
|
||||||
let rows = sqlx::query_as::<_, LastfmArtistTagRow>(
|
let rows = sqlx::query_as::<_, LastfmArtistTagRow>(
|
||||||
r#"SELECT DISTINCT a.id, a.name::text AS name
|
r#"SELECT DISTINCT a.id, a.name::text AS name
|
||||||
FROM furumusic__artist a
|
FROM furumusic__artist a
|
||||||
@@ -376,7 +394,17 @@ async fn backfill_lastfm_artist_tags(
|
|||||||
));
|
));
|
||||||
let total = rows.len();
|
let total = rows.len();
|
||||||
for (index, row) in rows.into_iter().enumerate() {
|
for (index, row) in rows.into_iter().enumerate() {
|
||||||
if should_skip_lastfm_entity(&ctx.pool, "artist", row.id, overwrite).await? {
|
if should_log_lastfm_item(index + 1, total, 25) {
|
||||||
|
log.info(&format!(
|
||||||
|
"Last.fm artist tags {}/{}: artist {} \"{}\"",
|
||||||
|
index + 1,
|
||||||
|
total,
|
||||||
|
row.id,
|
||||||
|
row.name
|
||||||
|
));
|
||||||
|
}
|
||||||
|
match should_skip_lastfm_entity(&ctx.pool, "artist", row.id, overwrite).await {
|
||||||
|
Ok(true) => {
|
||||||
stats.skipped_existing += 1;
|
stats.skipped_existing += 1;
|
||||||
if should_log_lastfm_progress(index + 1, total, 25) {
|
if should_log_lastfm_progress(index + 1, total, 25) {
|
||||||
log.info(&format!(
|
log.info(&format!(
|
||||||
@@ -387,20 +415,45 @@ async fn backfill_lastfm_artist_tags(
|
|||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
Ok(false) => {}
|
||||||
|
Err(err) => {
|
||||||
|
stats.failed += 1;
|
||||||
|
log.warn(&format!(
|
||||||
|
"Last.fm artist tags skip check failed for artist {} \"{}\": {err}",
|
||||||
|
row.id, row.name
|
||||||
|
));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
stats.considered += 1;
|
stats.considered += 1;
|
||||||
match fetch_lastfm_artist_tags(client, api_key, &row.name).await {
|
match fetch_lastfm_artist_tags(client, api_key, &row.name).await {
|
||||||
Ok(tags) if !tags.is_empty() => {
|
Ok(tags) if !tags.is_empty() => {
|
||||||
let saved =
|
match replace_entity_tags(&ctx.pool, "artist", row.id, &tags, "lastfm", false)
|
||||||
replace_entity_tags(&ctx.pool, "artist", row.id, &tags, "lastfm", false)
|
.await
|
||||||
.await?;
|
{
|
||||||
|
Ok(saved) => {
|
||||||
stats.tags_saved += saved;
|
stats.tags_saved += saved;
|
||||||
stats.updated_entities += 1;
|
stats.updated_entities += 1;
|
||||||
}
|
}
|
||||||
|
Err(err) => {
|
||||||
|
stats.failed += 1;
|
||||||
|
log.warn(&format!(
|
||||||
|
"Last.fm artist tags save failed for artist {} \"{}\": {err}",
|
||||||
|
row.id, row.name
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Ok(_) => {
|
Ok(_) => {
|
||||||
stats.not_found += 1;
|
stats.not_found += 1;
|
||||||
}
|
}
|
||||||
Err(err) if err.to_string().contains("Last.fm rate limit exceeded") => {
|
Err(err) if is_lastfm_rate_limit_error(&err) => {
|
||||||
return Err(err);
|
stats.failed += 1;
|
||||||
|
log.warn(&format!(
|
||||||
|
"Last.fm rate limit reached while fetching artist tags for artist {} \"{}\"; stopping Last.fm tag backfill for this run",
|
||||||
|
row.id, row.name
|
||||||
|
));
|
||||||
|
return Ok(LastfmTagPassResult::RateLimited);
|
||||||
}
|
}
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
stats.failed += 1;
|
stats.failed += 1;
|
||||||
@@ -419,7 +472,7 @@ async fn backfill_lastfm_artist_tags(
|
|||||||
}
|
}
|
||||||
tokio::time::sleep(LASTFM_TAG_REQUEST_DELAY).await;
|
tokio::time::sleep(LASTFM_TAG_REQUEST_DELAY).await;
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(LastfmTagPassResult::Completed)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn backfill_lastfm_release_tags(
|
async fn backfill_lastfm_release_tags(
|
||||||
@@ -429,7 +482,7 @@ async fn backfill_lastfm_release_tags(
|
|||||||
api_key: &str,
|
api_key: &str,
|
||||||
overwrite: bool,
|
overwrite: bool,
|
||||||
stats: &mut LastfmTagStats,
|
stats: &mut LastfmTagStats,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<LastfmTagPassResult> {
|
||||||
let rows = sqlx::query_as::<_, LastfmReleaseTagRow>(
|
let rows = sqlx::query_as::<_, LastfmReleaseTagRow>(
|
||||||
r#"SELECT r.id,
|
r#"SELECT r.id,
|
||||||
r.title::text AS title,
|
r.title::text AS title,
|
||||||
@@ -454,7 +507,17 @@ async fn backfill_lastfm_release_tags(
|
|||||||
));
|
));
|
||||||
let total = rows.len();
|
let total = rows.len();
|
||||||
for (index, row) in rows.into_iter().enumerate() {
|
for (index, row) in rows.into_iter().enumerate() {
|
||||||
if should_skip_lastfm_entity(&ctx.pool, "release", row.id, overwrite).await? {
|
if should_log_lastfm_item(index + 1, total, 25) {
|
||||||
|
log.info(&format!(
|
||||||
|
"Last.fm release tags {}/{}: release {} \"{}\"",
|
||||||
|
index + 1,
|
||||||
|
total,
|
||||||
|
row.id,
|
||||||
|
row.title
|
||||||
|
));
|
||||||
|
}
|
||||||
|
match should_skip_lastfm_entity(&ctx.pool, "release", row.id, overwrite).await {
|
||||||
|
Ok(true) => {
|
||||||
stats.skipped_existing += 1;
|
stats.skipped_existing += 1;
|
||||||
if should_log_lastfm_progress(index + 1, total, 25) {
|
if should_log_lastfm_progress(index + 1, total, 25) {
|
||||||
log.info(&format!(
|
log.info(&format!(
|
||||||
@@ -465,6 +528,16 @@ async fn backfill_lastfm_release_tags(
|
|||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
Ok(false) => {}
|
||||||
|
Err(err) => {
|
||||||
|
stats.failed += 1;
|
||||||
|
log.warn(&format!(
|
||||||
|
"Last.fm release tags skip check failed for release {} \"{}\": {err}",
|
||||||
|
row.id, row.title
|
||||||
|
));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
let Some(artist) = row.artist_name.as_deref().filter(|value| !value.trim().is_empty())
|
let Some(artist) = row.artist_name.as_deref().filter(|value| !value.trim().is_empty())
|
||||||
else {
|
else {
|
||||||
stats.not_found += 1;
|
stats.not_found += 1;
|
||||||
@@ -480,17 +553,32 @@ async fn backfill_lastfm_release_tags(
|
|||||||
stats.considered += 1;
|
stats.considered += 1;
|
||||||
match fetch_lastfm_album_tags(client, api_key, artist, &row.title).await {
|
match fetch_lastfm_album_tags(client, api_key, artist, &row.title).await {
|
||||||
Ok(tags) if !tags.is_empty() => {
|
Ok(tags) if !tags.is_empty() => {
|
||||||
let saved =
|
match replace_entity_tags(&ctx.pool, "release", row.id, &tags, "lastfm", false)
|
||||||
replace_entity_tags(&ctx.pool, "release", row.id, &tags, "lastfm", false)
|
.await
|
||||||
.await?;
|
{
|
||||||
|
Ok(saved) => {
|
||||||
stats.tags_saved += saved;
|
stats.tags_saved += saved;
|
||||||
stats.updated_entities += 1;
|
stats.updated_entities += 1;
|
||||||
}
|
}
|
||||||
|
Err(err) => {
|
||||||
|
stats.failed += 1;
|
||||||
|
log.warn(&format!(
|
||||||
|
"Last.fm release tags save failed for release {} \"{}\" / \"{}\": {err}",
|
||||||
|
row.id, artist, row.title
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Ok(_) => {
|
Ok(_) => {
|
||||||
stats.not_found += 1;
|
stats.not_found += 1;
|
||||||
}
|
}
|
||||||
Err(err) if err.to_string().contains("Last.fm rate limit exceeded") => {
|
Err(err) if is_lastfm_rate_limit_error(&err) => {
|
||||||
return Err(err);
|
stats.failed += 1;
|
||||||
|
log.warn(&format!(
|
||||||
|
"Last.fm rate limit reached while fetching release tags for release {} \"{}\" / \"{}\"; stopping Last.fm tag backfill for this run",
|
||||||
|
row.id, artist, row.title
|
||||||
|
));
|
||||||
|
return Ok(LastfmTagPassResult::RateLimited);
|
||||||
}
|
}
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
stats.failed += 1;
|
stats.failed += 1;
|
||||||
@@ -509,7 +597,7 @@ async fn backfill_lastfm_release_tags(
|
|||||||
}
|
}
|
||||||
tokio::time::sleep(LASTFM_TAG_REQUEST_DELAY).await;
|
tokio::time::sleep(LASTFM_TAG_REQUEST_DELAY).await;
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(LastfmTagPassResult::Completed)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn backfill_lastfm_track_tags(
|
async fn backfill_lastfm_track_tags(
|
||||||
@@ -519,7 +607,7 @@ async fn backfill_lastfm_track_tags(
|
|||||||
api_key: &str,
|
api_key: &str,
|
||||||
overwrite: bool,
|
overwrite: bool,
|
||||||
stats: &mut LastfmTagStats,
|
stats: &mut LastfmTagStats,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<LastfmTagPassResult> {
|
||||||
let rows = sqlx::query_as::<_, LastfmTrackTagRow>(
|
let rows = sqlx::query_as::<_, LastfmTrackTagRow>(
|
||||||
r#"SELECT t.id,
|
r#"SELECT t.id,
|
||||||
t.title::text AS title,
|
t.title::text AS title,
|
||||||
@@ -544,7 +632,17 @@ async fn backfill_lastfm_track_tags(
|
|||||||
));
|
));
|
||||||
let total = rows.len();
|
let total = rows.len();
|
||||||
for (index, row) in rows.into_iter().enumerate() {
|
for (index, row) in rows.into_iter().enumerate() {
|
||||||
if should_skip_lastfm_entity(&ctx.pool, "track", row.id, overwrite).await? {
|
if should_log_lastfm_item(index + 1, total, 50) {
|
||||||
|
log.info(&format!(
|
||||||
|
"Last.fm track tags {}/{}: track {} \"{}\"",
|
||||||
|
index + 1,
|
||||||
|
total,
|
||||||
|
row.id,
|
||||||
|
row.title
|
||||||
|
));
|
||||||
|
}
|
||||||
|
match should_skip_lastfm_entity(&ctx.pool, "track", row.id, overwrite).await {
|
||||||
|
Ok(true) => {
|
||||||
stats.skipped_existing += 1;
|
stats.skipped_existing += 1;
|
||||||
if should_log_lastfm_progress(index + 1, total, 50) {
|
if should_log_lastfm_progress(index + 1, total, 50) {
|
||||||
log.info(&format!(
|
log.info(&format!(
|
||||||
@@ -555,6 +653,16 @@ async fn backfill_lastfm_track_tags(
|
|||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
Ok(false) => {}
|
||||||
|
Err(err) => {
|
||||||
|
stats.failed += 1;
|
||||||
|
log.warn(&format!(
|
||||||
|
"Last.fm track tags skip check failed for track {} \"{}\": {err}",
|
||||||
|
row.id, row.title
|
||||||
|
));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
let Some(artist) = row.artist_name.as_deref().filter(|value| !value.trim().is_empty())
|
let Some(artist) = row.artist_name.as_deref().filter(|value| !value.trim().is_empty())
|
||||||
else {
|
else {
|
||||||
stats.not_found += 1;
|
stats.not_found += 1;
|
||||||
@@ -570,16 +678,31 @@ async fn backfill_lastfm_track_tags(
|
|||||||
stats.considered += 1;
|
stats.considered += 1;
|
||||||
match fetch_lastfm_track_tags(client, api_key, artist, &row.title).await {
|
match fetch_lastfm_track_tags(client, api_key, artist, &row.title).await {
|
||||||
Ok(tags) if !tags.is_empty() => {
|
Ok(tags) if !tags.is_empty() => {
|
||||||
let saved =
|
match replace_entity_tags(&ctx.pool, "track", row.id, &tags, "lastfm", true).await
|
||||||
replace_entity_tags(&ctx.pool, "track", row.id, &tags, "lastfm", true).await?;
|
{
|
||||||
|
Ok(saved) => {
|
||||||
stats.tags_saved += saved;
|
stats.tags_saved += saved;
|
||||||
stats.updated_entities += 1;
|
stats.updated_entities += 1;
|
||||||
}
|
}
|
||||||
|
Err(err) => {
|
||||||
|
stats.failed += 1;
|
||||||
|
log.warn(&format!(
|
||||||
|
"Last.fm track tags save failed for track {} \"{}\" / \"{}\": {err}",
|
||||||
|
row.id, artist, row.title
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Ok(_) => {
|
Ok(_) => {
|
||||||
stats.not_found += 1;
|
stats.not_found += 1;
|
||||||
}
|
}
|
||||||
Err(err) if err.to_string().contains("Last.fm rate limit exceeded") => {
|
Err(err) if is_lastfm_rate_limit_error(&err) => {
|
||||||
return Err(err);
|
stats.failed += 1;
|
||||||
|
log.warn(&format!(
|
||||||
|
"Last.fm rate limit reached while fetching track tags for track {} \"{}\" / \"{}\"; stopping Last.fm tag backfill for this run",
|
||||||
|
row.id, artist, row.title
|
||||||
|
));
|
||||||
|
return Ok(LastfmTagPassResult::RateLimited);
|
||||||
}
|
}
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
stats.failed += 1;
|
stats.failed += 1;
|
||||||
@@ -598,13 +721,21 @@ async fn backfill_lastfm_track_tags(
|
|||||||
}
|
}
|
||||||
tokio::time::sleep(LASTFM_TAG_REQUEST_DELAY).await;
|
tokio::time::sleep(LASTFM_TAG_REQUEST_DELAY).await;
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(LastfmTagPassResult::Completed)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn should_log_lastfm_progress(done: usize, total: usize, every: usize) -> bool {
|
fn should_log_lastfm_progress(done: usize, total: usize, every: usize) -> bool {
|
||||||
total > 0 && (done == total || done % every == 0)
|
total > 0 && (done == total || done % every == 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn should_log_lastfm_item(done: usize, total: usize, every: usize) -> bool {
|
||||||
|
total > 0 && (done == 1 || done == total || done % every == 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_lastfm_rate_limit_error(err: &anyhow::Error) -> bool {
|
||||||
|
err.to_string().contains("Last.fm rate limit exceeded")
|
||||||
|
}
|
||||||
|
|
||||||
async fn should_skip_lastfm_entity(
|
async fn should_skip_lastfm_entity(
|
||||||
pool: &sqlx::PgPool,
|
pool: &sqlx::PgPool,
|
||||||
entity_kind: &str,
|
entity_kind: &str,
|
||||||
|
|||||||
Reference in New Issue
Block a user