5 Commits

Author SHA1 Message Date
ab 48c473de56 fix(agent): increase max_tokens for merge requests to avoid truncated responses
Publish Metadata Agent Image (dev) / build-and-push-image (push) Successful in 3m43s
Publish Web Player Image (dev) / build-and-push-image (push) Successful in 4m20s
normalize: 512 tokens (sufficient for single track metadata)
merge: 4096 tokens (needed for artists with many albums)

Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
2026-04-07 22:52:23 +01:00
ab 1e75644abb feat(agent): switch LLM client from Ollama to OpenAI-compatible API (LM Studio support)
Publish Metadata Agent Image (dev) / build-and-push-image (push) Successful in 4m7s
Publish Web Player Image (dev) / build-and-push-image (push) Successful in 3m57s
- Replace /api/chat with /v1/chat/completions endpoint
- Use json_schema response_format (LM Studio does not support json_object)
- Make schema parameter optional in call_ollama to support different schemas per use case
- Add dedicated normalize schema (normalized_metadata) with release_kind field
  instead of release_type to avoid model repetition loops
- Add dedicated merge schema (artist_merge) so model no longer confuses
  normalize and merge response structures
- Add retry with frequency_penalty=1.5 on parse failure to suppress repetition
- Add id3 crate as fallback metadata reader for MP3 files with large embedded
  cover art that exceed Symphonia probe limit of 1MB

Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
2026-04-07 22:34:39 +01:00
ab 2d7ac3d8ce Fixed openai api endpoint
Publish Metadata Agent Image (dev) / build-and-push-image (push) Successful in 4m0s
Publish Web Player Image (dev) / build-and-push-image (push) Successful in 4m23s
2026-04-07 19:52:03 +01:00
ab 70a947a8c1 Fixed openai api endpoint
Publish Metadata Agent Image (dev) / build-and-push-image (push) Successful in 3m38s
Publish Web Player Image (dev) / build-and-push-image (push) Successful in 3m49s
2026-04-07 19:32:17 +01:00
XakPlant aea4aef4b2 Merge pull request 'feature/node-app' (#7) from feature/node-app into DEV
Publish Metadata Agent Image (dev) / build-and-push-image (push) Successful in 1m8s
Publish Web Player Image (dev) / build-and-push-image (push) Successful in 1m7s
Reviewed-on: #7
2026-03-23 14:00:08 +00:00
5 changed files with 219 additions and 26 deletions
Generated
+54
View File
@@ -2,6 +2,12 @@
# It is not intended for manual editing.
version = 4
[[package]]
name = "adler2"
version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
[[package]]
name = "aho-corasick"
version = "1.1.4"
@@ -572,6 +578,15 @@ version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5"
[[package]]
name = "crc32fast"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
dependencies = [
"cfg-if",
]
[[package]]
name = "crossbeam-channel"
version = "0.5.15"
@@ -969,6 +984,16 @@ version = "0.5.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99"
[[package]]
name = "flate2"
version = "1.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c"
dependencies = [
"crc32fast",
"miniz_oxide",
]
[[package]]
name = "flume"
version = "0.11.1"
@@ -1017,6 +1042,7 @@ dependencies = [
"chrono",
"clap",
"encoding_rs",
"id3",
"reqwest 0.12.28",
"serde",
"serde_json",
@@ -1165,6 +1191,7 @@ dependencies = [
"tokio",
"tokio-util",
"tower 0.4.13",
"tower-http",
"tracing",
"tracing-subscriber",
"urlencoding",
@@ -1748,6 +1775,17 @@ version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954"
[[package]]
name = "id3"
version = "1.16.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "965c5e6a62a241f2f673df956ea5f52c27780bc1031855890a551ed9b869e2d1"
dependencies = [
"bitflags 2.11.0",
"byteorder",
"flate2",
]
[[package]]
name = "ident_case"
version = "1.0.1"
@@ -2021,6 +2059,16 @@ version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "miniz_oxide"
version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
dependencies = [
"adler2",
"simd-adler32",
]
[[package]]
name = "mio"
version = "1.1.1"
@@ -3412,6 +3460,12 @@ dependencies = [
"rand_core 0.6.4",
]
[[package]]
name = "simd-adler32"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214"
[[package]]
name = "simple_asn1"
version = "0.6.4"
+1
View File
@@ -14,6 +14,7 @@ serde = { version = "1", features = ["derive"] }
serde_json = "1"
sqlx = { version = "0.8", features = ["runtime-tokio-rustls", "postgres", "chrono", "uuid", "migrate"] }
symphonia = { version = "0.5", default-features = false, features = ["mp3", "aac", "flac", "vorbis", "wav", "alac", "adpcm", "pcm", "mpa", "isomp4", "ogg", "aiff", "mkv"] }
id3 = "1"
thiserror = "2.0"
tokio = { version = "1.50", features = ["full"] }
tracing = "0.1"
+36 -1
View File
@@ -19,9 +19,25 @@ pub struct RawMetadata {
pub duration_secs: Option<f64>,
}
/// Extract metadata from an audio file using Symphonia.
/// Extract metadata from an audio file.
/// For MP3, falls back to the `id3` crate when Symphonia cannot probe the file
/// (e.g., ID3 tag with large embedded cover art exceeds Symphonia's 1 MB probe limit).
/// Must be called from a blocking context (spawn_blocking).
pub fn extract(path: &Path) -> anyhow::Result<RawMetadata> {
match extract_via_symphonia(path) {
Ok(meta) => return Ok(meta),
Err(e) => {
let is_mp3 = path.extension().and_then(|e| e.to_str()).map(|e| e.eq_ignore_ascii_case("mp3")).unwrap_or(false);
if is_mp3 {
tracing::debug!(error = %e, "Symphonia failed on MP3, falling back to id3 crate");
return extract_mp3_via_id3(path);
}
return Err(e);
}
}
}
fn extract_via_symphonia(path: &Path) -> anyhow::Result<RawMetadata> {
let file = std::fs::File::open(path)?;
let mss = MediaSourceStream::new(Box::new(file), Default::default());
@@ -66,6 +82,25 @@ pub fn extract(path: &Path) -> anyhow::Result<RawMetadata> {
Ok(meta)
}
/// Read MP3 tags via the `id3` crate. Duration is not available this way.
fn extract_mp3_via_id3(path: &Path) -> anyhow::Result<RawMetadata> {
use id3::TagLike;
let tag = id3::Tag::read_from_path(path)
.map_err(|e| anyhow::anyhow!("id3 read failed: {}", e))?;
let mut meta = RawMetadata::default();
meta.title = tag.title().map(|s| fix_encoding(s.to_owned()));
meta.artist = tag.artist().map(|s| fix_encoding(s.to_owned()));
meta.album = tag.album().map(|s| fix_encoding(s.to_owned()));
meta.year = tag.year().and_then(|y| u32::try_from(y).ok());
meta.track_number = tag.track();
meta.genre = tag.genre().map(|s: &str| fix_encoding(s.to_owned()));
// duration_secs remains None — acceptable for large-cover files
Ok(meta)
}
fn extract_tags(tags: &[symphonia::core::meta::Tag], meta: &mut RawMetadata) {
for tag in tags {
let value = fix_encoding(tag.value.to_string());
+101 -25
View File
@@ -25,16 +25,37 @@ pub async fn normalize(
) -> anyhow::Result<NormalizedFields> {
let user_message = build_user_message(raw, hints, similar_artists, similar_albums, folder_ctx);
let schema = normalize_schema();
let response = call_ollama(
&state.config.ollama_url,
&state.config.ollama_model,
&state.system_prompt,
&user_message,
state.config.ollama_auth.as_deref(),
0.5,
512,
Some(("normalized_metadata", schema.clone())),
)
.await?;
parse_response(&response)
match parse_response(&response) {
Ok(fields) => Ok(fields),
Err(e) => {
tracing::warn!(error = %e, "LLM parse failed, retrying with higher frequency_penalty");
let response2 = call_ollama(
&state.config.ollama_url,
&state.config.ollama_model,
&state.system_prompt,
&user_message,
state.config.ollama_auth.as_deref(),
1.5,
512,
Some(("normalized_metadata", schema)),
)
.await?;
parse_response(&response2)
}
}
}
fn build_user_message(
@@ -113,32 +134,49 @@ fn build_user_message(
}
#[derive(Serialize)]
struct OllamaRequest {
struct ChatRequest {
model: String,
messages: Vec<OllamaMessage>,
format: String,
messages: Vec<ChatMessage>,
#[serde(skip_serializing_if = "Option::is_none")]
response_format: Option<ChatResponseFormat>,
stream: bool,
options: OllamaOptions,
temperature: f64,
max_tokens: u32,
frequency_penalty: f64,
}
#[derive(Serialize)]
struct OllamaMessage {
struct ChatMessage {
role: String,
content: String,
}
#[derive(Serialize)]
struct OllamaOptions {
temperature: f64,
struct ChatResponseFormat {
#[serde(rename = "type")]
kind: String,
json_schema: JsonSchemaWrapper,
}
#[derive(Serialize)]
struct JsonSchemaWrapper {
name: String,
strict: bool,
schema: serde_json::Value,
}
#[derive(Deserialize)]
struct OllamaResponse {
message: OllamaResponseMessage,
struct ChatResponse {
choices: Vec<ChatChoice>,
}
#[derive(Deserialize)]
struct OllamaResponseMessage {
struct ChatChoice {
message: ChatResponseMessage,
}
#[derive(Deserialize)]
struct ChatResponseMessage {
content: String,
}
@@ -148,30 +186,40 @@ pub async fn call_ollama(
system_prompt: &str,
user_message: &str,
auth: Option<&str>,
frequency_penalty: f64,
max_tokens: u32,
schema: Option<(&str, serde_json::Value)>,
) -> anyhow::Result<String> {
let client = reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(120))
.build()?;
let request = OllamaRequest {
let response_format = schema.map(|(name, schema)| ChatResponseFormat {
kind: "json_schema".to_owned(),
json_schema: JsonSchemaWrapper { name: name.to_owned(), strict: true, schema },
});
let request = ChatRequest {
model: model.to_owned(),
messages: vec![
OllamaMessage {
ChatMessage {
role: "system".to_owned(),
content: system_prompt.to_owned(),
},
OllamaMessage {
ChatMessage {
role: "user".to_owned(),
content: user_message.to_owned(),
},
],
format: "json".to_owned(),
response_format,
stream: false,
options: OllamaOptions { temperature: 0.1 },
temperature: 0.1,
max_tokens,
frequency_penalty,
};
let url = format!("{}/api/chat", base_url.trim_end_matches('/'));
tracing::info!(%url, model, prompt_len = user_message.len(), "Calling Ollama API...");
let url = format!("{}/v1/chat/completions", base_url.trim_end_matches('/'));
tracing::info!(%url, model, prompt_len = user_message.len(), "Calling LLM API...");
let start = std::time::Instant::now();
let mut req = client.post(&url).json(&request);
@@ -184,18 +232,45 @@ pub async fn call_ollama(
if !resp.status().is_success() {
let status = resp.status();
let body = resp.text().await.unwrap_or_default();
tracing::error!(%status, body = &body[..body.len().min(500)], "Ollama API error");
anyhow::bail!("Ollama returned {}: {}", status, body);
tracing::error!(%status, body = &body[..body.len().min(500)], "LLM API error");
anyhow::bail!("LLM returned {}: {}", status, body);
}
let ollama_resp: OllamaResponse = resp.json().await?;
let chat_resp: ChatResponse = resp.json().await?;
let content = chat_resp
.choices
.into_iter()
.next()
.ok_or_else(|| anyhow::anyhow!("LLM returned empty choices"))?
.message
.content;
tracing::info!(
elapsed_ms = elapsed.as_millis() as u64,
response_len = ollama_resp.message.content.len(),
"Ollama response received"
response_len = content.len(),
"LLM response received"
);
tracing::debug!(raw_response = %ollama_resp.message.content, "LLM raw output");
Ok(ollama_resp.message.content)
tracing::debug!(raw_response = %content, "LLM raw output");
Ok(content)
}
fn normalize_schema() -> serde_json::Value {
serde_json::json!({
"type": "object",
"properties": {
"artist": { "type": ["string", "null"] },
"album": { "type": ["string", "null"] },
"title": { "type": ["string", "null"] },
"year": { "type": ["integer", "null"] },
"track_number": { "type": ["integer", "null"] },
"genre": { "type": ["string", "null"] },
"featured_artists": { "type": "array", "items": { "type": "string" } },
"release_kind": { "type": ["string", "null"] },
"confidence": { "type": ["number", "null"] },
"notes": { "type": ["string", "null"] }
},
"required": ["artist", "album", "title", "year", "track_number", "genre", "featured_artists", "release_kind", "confidence", "notes"],
"additionalProperties": false
})
}
/// Parse the LLM JSON response into NormalizedFields.
@@ -222,6 +297,7 @@ fn parse_response(response: &str) -> anyhow::Result<NormalizedFields> {
genre: Option<String>,
#[serde(default)]
featured_artists: Vec<String>,
#[serde(rename = "release_kind")]
release_type: Option<String>,
confidence: Option<f64>,
notes: Option<String>,
+27
View File
@@ -35,12 +35,39 @@ pub async fn propose_merge(state: &Arc<AppState>, merge_id: Uuid) -> anyhow::Res
let user_message = build_merge_message(&artists_data);
let schema = serde_json::json!({
"type": "object",
"properties": {
"canonical_artist_name": { "type": "string" },
"winner_artist_id": { "type": "integer" },
"album_mappings": {
"type": "array",
"items": {
"type": "object",
"properties": {
"source_album_id": { "type": "integer" },
"canonical_name": { "type": "string" },
"merge_into_album_id": { "type": ["integer", "null"] }
},
"required": ["source_album_id", "canonical_name", "merge_into_album_id"],
"additionalProperties": false
}
},
"notes": { "type": "string" }
},
"required": ["canonical_artist_name", "winner_artist_id", "album_mappings", "notes"],
"additionalProperties": false
});
let response = call_ollama(
&state.config.ollama_url,
&state.config.ollama_model,
&state.merge_prompt,
&user_message,
state.config.ollama_auth.as_deref(),
0.5,
4096,
Some(("artist_merge", schema)),
).await?;
let proposal = parse_merge_response(&response)?;