Compare commits
6 Commits
e42566f44e
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 761306eb9d | |||
| e85ed32b7b | |||
| e34440498c | |||
| f873542d02 | |||
| 3f2013e9d5 | |||
| 7ede23ff94 |
Generated
+54
@@ -2,6 +2,12 @@
|
|||||||
# It is not intended for manual editing.
|
# It is not intended for manual editing.
|
||||||
version = 4
|
version = 4
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "adler2"
|
||||||
|
version = "2.0.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aho-corasick"
|
name = "aho-corasick"
|
||||||
version = "1.1.4"
|
version = "1.1.4"
|
||||||
@@ -572,6 +578,15 @@ version = "2.4.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5"
|
checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crc32fast"
|
||||||
|
version = "1.5.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "crossbeam-channel"
|
name = "crossbeam-channel"
|
||||||
version = "0.5.15"
|
version = "0.5.15"
|
||||||
@@ -969,6 +984,16 @@ version = "0.5.7"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99"
|
checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "flate2"
|
||||||
|
version = "1.1.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c"
|
||||||
|
dependencies = [
|
||||||
|
"crc32fast",
|
||||||
|
"miniz_oxide",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "flume"
|
name = "flume"
|
||||||
version = "0.11.1"
|
version = "0.11.1"
|
||||||
@@ -1017,6 +1042,7 @@ dependencies = [
|
|||||||
"chrono",
|
"chrono",
|
||||||
"clap",
|
"clap",
|
||||||
"encoding_rs",
|
"encoding_rs",
|
||||||
|
"id3",
|
||||||
"reqwest 0.12.28",
|
"reqwest 0.12.28",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
@@ -1165,6 +1191,7 @@ dependencies = [
|
|||||||
"tokio",
|
"tokio",
|
||||||
"tokio-util",
|
"tokio-util",
|
||||||
"tower 0.4.13",
|
"tower 0.4.13",
|
||||||
|
"tower-http",
|
||||||
"tracing",
|
"tracing",
|
||||||
"tracing-subscriber",
|
"tracing-subscriber",
|
||||||
"urlencoding",
|
"urlencoding",
|
||||||
@@ -1748,6 +1775,17 @@ version = "2.3.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954"
|
checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "id3"
|
||||||
|
version = "1.16.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "965c5e6a62a241f2f673df956ea5f52c27780bc1031855890a551ed9b869e2d1"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags 2.11.0",
|
||||||
|
"byteorder",
|
||||||
|
"flate2",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ident_case"
|
name = "ident_case"
|
||||||
version = "1.0.1"
|
version = "1.0.1"
|
||||||
@@ -2021,6 +2059,16 @@ version = "0.2.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
|
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "miniz_oxide"
|
||||||
|
version = "0.8.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
|
||||||
|
dependencies = [
|
||||||
|
"adler2",
|
||||||
|
"simd-adler32",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "mio"
|
name = "mio"
|
||||||
version = "1.1.1"
|
version = "1.1.1"
|
||||||
@@ -3412,6 +3460,12 @@ dependencies = [
|
|||||||
"rand_core 0.6.4",
|
"rand_core 0.6.4",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "simd-adler32"
|
||||||
|
version = "0.3.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "simple_asn1"
|
name = "simple_asn1"
|
||||||
version = "0.6.4"
|
version = "0.6.4"
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ serde = { version = "1", features = ["derive"] }
|
|||||||
serde_json = "1"
|
serde_json = "1"
|
||||||
sqlx = { version = "0.8", features = ["runtime-tokio-rustls", "postgres", "chrono", "uuid", "migrate"] }
|
sqlx = { version = "0.8", features = ["runtime-tokio-rustls", "postgres", "chrono", "uuid", "migrate"] }
|
||||||
symphonia = { version = "0.5", default-features = false, features = ["mp3", "aac", "flac", "vorbis", "wav", "alac", "adpcm", "pcm", "mpa", "isomp4", "ogg", "aiff", "mkv"] }
|
symphonia = { version = "0.5", default-features = false, features = ["mp3", "aac", "flac", "vorbis", "wav", "alac", "adpcm", "pcm", "mpa", "isomp4", "ogg", "aiff", "mkv"] }
|
||||||
|
id3 = "1"
|
||||||
thiserror = "2.0"
|
thiserror = "2.0"
|
||||||
tokio = { version = "1.50", features = ["full"] }
|
tokio = { version = "1.50", features = ["full"] }
|
||||||
tracing = "0.1"
|
tracing = "0.1"
|
||||||
|
|||||||
@@ -19,9 +19,25 @@ pub struct RawMetadata {
|
|||||||
pub duration_secs: Option<f64>,
|
pub duration_secs: Option<f64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Extract metadata from an audio file using Symphonia.
|
/// Extract metadata from an audio file.
|
||||||
|
/// For MP3, falls back to the `id3` crate when Symphonia cannot probe the file
|
||||||
|
/// (e.g., ID3 tag with large embedded cover art exceeds Symphonia's 1 MB probe limit).
|
||||||
/// Must be called from a blocking context (spawn_blocking).
|
/// Must be called from a blocking context (spawn_blocking).
|
||||||
pub fn extract(path: &Path) -> anyhow::Result<RawMetadata> {
|
pub fn extract(path: &Path) -> anyhow::Result<RawMetadata> {
|
||||||
|
match extract_via_symphonia(path) {
|
||||||
|
Ok(meta) => return Ok(meta),
|
||||||
|
Err(e) => {
|
||||||
|
let is_mp3 = path.extension().and_then(|e| e.to_str()).map(|e| e.eq_ignore_ascii_case("mp3")).unwrap_or(false);
|
||||||
|
if is_mp3 {
|
||||||
|
tracing::debug!(error = %e, "Symphonia failed on MP3, falling back to id3 crate");
|
||||||
|
return extract_mp3_via_id3(path);
|
||||||
|
}
|
||||||
|
return Err(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_via_symphonia(path: &Path) -> anyhow::Result<RawMetadata> {
|
||||||
let file = std::fs::File::open(path)?;
|
let file = std::fs::File::open(path)?;
|
||||||
let mss = MediaSourceStream::new(Box::new(file), Default::default());
|
let mss = MediaSourceStream::new(Box::new(file), Default::default());
|
||||||
|
|
||||||
@@ -66,6 +82,25 @@ pub fn extract(path: &Path) -> anyhow::Result<RawMetadata> {
|
|||||||
Ok(meta)
|
Ok(meta)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Read MP3 tags via the `id3` crate. Duration is not available this way.
|
||||||
|
fn extract_mp3_via_id3(path: &Path) -> anyhow::Result<RawMetadata> {
|
||||||
|
use id3::TagLike;
|
||||||
|
|
||||||
|
let tag = id3::Tag::read_from_path(path)
|
||||||
|
.map_err(|e| anyhow::anyhow!("id3 read failed: {}", e))?;
|
||||||
|
|
||||||
|
let mut meta = RawMetadata::default();
|
||||||
|
meta.title = tag.title().map(|s| fix_encoding(s.to_owned()));
|
||||||
|
meta.artist = tag.artist().map(|s| fix_encoding(s.to_owned()));
|
||||||
|
meta.album = tag.album().map(|s| fix_encoding(s.to_owned()));
|
||||||
|
meta.year = tag.year().and_then(|y| u32::try_from(y).ok());
|
||||||
|
meta.track_number = tag.track();
|
||||||
|
meta.genre = tag.genre().map(|s: &str| fix_encoding(s.to_owned()));
|
||||||
|
// duration_secs remains None — acceptable for large-cover files
|
||||||
|
|
||||||
|
Ok(meta)
|
||||||
|
}
|
||||||
|
|
||||||
fn extract_tags(tags: &[symphonia::core::meta::Tag], meta: &mut RawMetadata) {
|
fn extract_tags(tags: &[symphonia::core::meta::Tag], meta: &mut RawMetadata) {
|
||||||
for tag in tags {
|
for tag in tags {
|
||||||
let value = fix_encoding(tag.value.to_string());
|
let value = fix_encoding(tag.value.to_string());
|
||||||
|
|||||||
@@ -113,32 +113,38 @@ fn build_user_message(
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize)]
|
#[derive(Serialize)]
|
||||||
struct OllamaRequest {
|
struct ChatRequest {
|
||||||
model: String,
|
model: String,
|
||||||
messages: Vec<OllamaMessage>,
|
messages: Vec<ChatMessage>,
|
||||||
format: String,
|
response_format: ChatResponseFormat,
|
||||||
stream: bool,
|
stream: bool,
|
||||||
options: OllamaOptions,
|
temperature: f64,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize)]
|
#[derive(Serialize)]
|
||||||
struct OllamaMessage {
|
struct ChatMessage {
|
||||||
role: String,
|
role: String,
|
||||||
content: String,
|
content: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize)]
|
#[derive(Serialize)]
|
||||||
struct OllamaOptions {
|
struct ChatResponseFormat {
|
||||||
temperature: f64,
|
#[serde(rename = "type")]
|
||||||
|
kind: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Deserialize)]
|
#[derive(Deserialize)]
|
||||||
struct OllamaResponse {
|
struct ChatResponse {
|
||||||
message: OllamaResponseMessage,
|
choices: Vec<ChatChoice>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Deserialize)]
|
#[derive(Deserialize)]
|
||||||
struct OllamaResponseMessage {
|
struct ChatChoice {
|
||||||
|
message: ChatResponseMessage,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
struct ChatResponseMessage {
|
||||||
content: String,
|
content: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -153,25 +159,25 @@ pub async fn call_ollama(
|
|||||||
.timeout(std::time::Duration::from_secs(120))
|
.timeout(std::time::Duration::from_secs(120))
|
||||||
.build()?;
|
.build()?;
|
||||||
|
|
||||||
let request = OllamaRequest {
|
let request = ChatRequest {
|
||||||
model: model.to_owned(),
|
model: model.to_owned(),
|
||||||
messages: vec![
|
messages: vec![
|
||||||
OllamaMessage {
|
ChatMessage {
|
||||||
role: "system".to_owned(),
|
role: "system".to_owned(),
|
||||||
content: system_prompt.to_owned(),
|
content: system_prompt.to_owned(),
|
||||||
},
|
},
|
||||||
OllamaMessage {
|
ChatMessage {
|
||||||
role: "user".to_owned(),
|
role: "user".to_owned(),
|
||||||
content: user_message.to_owned(),
|
content: user_message.to_owned(),
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
format: "json".to_owned(),
|
response_format: ChatResponseFormat { kind: "json_object".to_owned() },
|
||||||
stream: false,
|
stream: false,
|
||||||
options: OllamaOptions { temperature: 0.1 },
|
temperature: 0.1,
|
||||||
};
|
};
|
||||||
|
|
||||||
let url = format!("{}/api/chat", base_url.trim_end_matches('/'));
|
let url = format!("{}/v1/chat/completions", base_url.trim_end_matches('/'));
|
||||||
tracing::info!(%url, model, prompt_len = user_message.len(), "Calling Ollama API...");
|
tracing::info!(%url, model, prompt_len = user_message.len(), "Calling LLM API...");
|
||||||
|
|
||||||
let start = std::time::Instant::now();
|
let start = std::time::Instant::now();
|
||||||
let mut req = client.post(&url).json(&request);
|
let mut req = client.post(&url).json(&request);
|
||||||
@@ -184,18 +190,25 @@ pub async fn call_ollama(
|
|||||||
if !resp.status().is_success() {
|
if !resp.status().is_success() {
|
||||||
let status = resp.status();
|
let status = resp.status();
|
||||||
let body = resp.text().await.unwrap_or_default();
|
let body = resp.text().await.unwrap_or_default();
|
||||||
tracing::error!(%status, body = &body[..body.len().min(500)], "Ollama API error");
|
tracing::error!(%status, body = &body[..body.len().min(500)], "LLM API error");
|
||||||
anyhow::bail!("Ollama returned {}: {}", status, body);
|
anyhow::bail!("LLM returned {}: {}", status, body);
|
||||||
}
|
}
|
||||||
|
|
||||||
let ollama_resp: OllamaResponse = resp.json().await?;
|
let chat_resp: ChatResponse = resp.json().await?;
|
||||||
|
let content = chat_resp
|
||||||
|
.choices
|
||||||
|
.into_iter()
|
||||||
|
.next()
|
||||||
|
.ok_or_else(|| anyhow::anyhow!("LLM returned empty choices"))?
|
||||||
|
.message
|
||||||
|
.content;
|
||||||
tracing::info!(
|
tracing::info!(
|
||||||
elapsed_ms = elapsed.as_millis() as u64,
|
elapsed_ms = elapsed.as_millis() as u64,
|
||||||
response_len = ollama_resp.message.content.len(),
|
response_len = content.len(),
|
||||||
"Ollama response received"
|
"LLM response received"
|
||||||
);
|
);
|
||||||
tracing::debug!(raw_response = %ollama_resp.message.content, "LLM raw output");
|
tracing::debug!(raw_response = %content, "LLM raw output");
|
||||||
Ok(ollama_resp.message.content)
|
Ok(content)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse the LLM JSON response into NormalizedFields.
|
/// Parse the LLM JSON response into NormalizedFields.
|
||||||
|
|||||||
Reference in New Issue
Block a user