Added AI agent to manage metadata

This commit is contained in:
2026-03-18 02:21:00 +00:00
parent 8a49a5013b
commit d5068aaa33
17 changed files with 3384 additions and 1 deletions

452
Cargo.lock generated
View File

@@ -11,6 +11,12 @@ dependencies = [
"memchr",
]
[[package]]
name = "allocator-api2"
version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
[[package]]
name = "android_system_properties"
version = "0.1.5"
@@ -76,6 +82,12 @@ version = "1.0.102"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
[[package]]
name = "arrayref"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb"
[[package]]
name = "arrayvec"
version = "0.7.6"
@@ -165,6 +177,15 @@ dependencies = [
"syn 2.0.117",
]
[[package]]
name = "atoi"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528"
dependencies = [
"num-traits",
]
[[package]]
name = "atomic-waker"
version = "1.1.2"
@@ -318,6 +339,23 @@ name = "bitflags"
version = "2.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af"
dependencies = [
"serde_core",
]
[[package]]
name = "blake3"
version = "1.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d"
dependencies = [
"arrayref",
"arrayvec",
"cc",
"cfg-if",
"constant_time_eq",
"cpufeatures",
]
[[package]]
name = "block-buffer"
@@ -478,6 +516,12 @@ version = "0.9.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8"
[[package]]
name = "constant_time_eq"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b"
[[package]]
name = "core-foundation"
version = "0.9.4"
@@ -513,6 +557,21 @@ dependencies = [
"libc",
]
[[package]]
name = "crc"
version = "3.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5eb8a2a1cd12ab0d987a5d5e825195d372001a4094a0376319d5a0ad71c1ba0d"
dependencies = [
"crc-catalog",
]
[[package]]
name = "crc-catalog"
version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5"
[[package]]
name = "crossbeam-channel"
version = "0.5.15"
@@ -531,6 +590,15 @@ dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-queue"
version = "0.3.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.21"
@@ -707,6 +775,12 @@ dependencies = [
"syn 2.0.117",
]
[[package]]
name = "dotenvy"
version = "0.15.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b"
[[package]]
name = "dunce"
version = "1.0.5"
@@ -762,6 +836,9 @@ name = "either"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
dependencies = [
"serde",
]
[[package]]
name = "elliptic-curve"
@@ -809,6 +886,17 @@ dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "etcetera"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "136d1b5283a1ab77bd9257427ffd09d8667ced0570b6f938942bc7568ed5b943"
dependencies = [
"cfg-if",
"home",
"windows-sys 0.48.0",
]
[[package]]
name = "event-listener"
version = "5.4.1"
@@ -881,6 +969,17 @@ version = "0.5.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99"
[[package]]
name = "flume"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da0e4dd2a88388a1f4ccc7c9ce104604dab68d9f408dc34cd45823d5a9069095"
dependencies = [
"futures-core",
"futures-sink",
"spin",
]
[[package]]
name = "fnv"
version = "1.0.7"
@@ -908,6 +1007,29 @@ version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "furumi-agent"
version = "0.3.4"
dependencies = [
"anyhow",
"axum",
"blake3",
"chrono",
"clap",
"encoding_rs",
"reqwest 0.12.28",
"serde",
"serde_json",
"sqlx",
"symphonia",
"thiserror 2.0.18",
"tokio",
"tower 0.4.13",
"tracing",
"tracing-subscriber",
"uuid",
]
[[package]]
name = "furumi-client-core"
version = "0.3.4"
@@ -1079,6 +1201,17 @@ dependencies = [
"futures-util",
]
[[package]]
name = "futures-intrusive"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d930c203dd0b6ff06e0201a4a2fe9149b43c684fd4420555b26d21b1a02956f"
dependencies = [
"futures-core",
"lock_api",
"parking_lot",
]
[[package]]
name = "futures-io"
version = "0.3.32"
@@ -1237,6 +1370,8 @@ version = "0.15.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
dependencies = [
"allocator-api2",
"equivalent",
"foldhash",
]
@@ -1246,6 +1381,15 @@ version = "0.16.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
[[package]]
name = "hashlink"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1"
dependencies = [
"hashbrown 0.15.5",
]
[[package]]
name = "heck"
version = "0.5.0"
@@ -1276,6 +1420,15 @@ dependencies = [
"digest",
]
[[package]]
name = "home"
version = "0.5.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cc627f471c528ff0c4a49e1d5e60450c8f6461dd6d10ba9dcd3a61d3dff7728d"
dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "http"
version = "0.2.12"
@@ -1739,6 +1892,16 @@ dependencies = [
"redox_syscall 0.7.3",
]
[[package]]
name = "libsqlite3-sys"
version = "0.30.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2e99fb7a497b1e3339bc746195567ed8d3e24945ecd636e3619d20b9de9e9149"
dependencies = [
"pkg-config",
"vcpkg",
]
[[package]]
name = "linux-raw-sys"
version = "0.4.15"
@@ -1793,6 +1956,16 @@ version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
[[package]]
name = "md-5"
version = "0.10.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf"
dependencies = [
"cfg-if",
"digest",
]
[[package]]
name = "memchr"
version = "2.8.0"
@@ -3155,6 +3328,17 @@ dependencies = [
"syn 2.0.117",
]
[[package]]
name = "sha1"
version = "0.10.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba"
dependencies = [
"cfg-if",
"cpufeatures",
"digest",
]
[[package]]
name = "sha2"
version = "0.10.9"
@@ -3224,6 +3408,9 @@ name = "smallvec"
version = "1.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
dependencies = [
"serde",
]
[[package]]
name = "socket2"
@@ -3250,6 +3437,9 @@ name = "spin"
version = "0.9.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
dependencies = [
"lock_api",
]
[[package]]
name = "spki"
@@ -3261,12 +3451,221 @@ dependencies = [
"der",
]
[[package]]
name = "sqlx"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fefb893899429669dcdd979aff487bd78f4064e5e7907e4269081e0ef7d97dc"
dependencies = [
"sqlx-core",
"sqlx-macros",
"sqlx-mysql",
"sqlx-postgres",
"sqlx-sqlite",
]
[[package]]
name = "sqlx-core"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee6798b1838b6a0f69c007c133b8df5866302197e404e8b6ee8ed3e3a5e68dc6"
dependencies = [
"base64 0.22.1",
"bytes",
"chrono",
"crc",
"crossbeam-queue",
"either",
"event-listener",
"futures-core",
"futures-intrusive",
"futures-io",
"futures-util",
"hashbrown 0.15.5",
"hashlink",
"indexmap 2.13.0",
"log",
"memchr",
"once_cell",
"percent-encoding",
"rustls 0.23.37",
"serde",
"serde_json",
"sha2",
"smallvec",
"thiserror 2.0.18",
"tokio",
"tokio-stream",
"tracing",
"url",
"uuid",
"webpki-roots 0.26.11",
]
[[package]]
name = "sqlx-macros"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2d452988ccaacfbf5e0bdbc348fb91d7c8af5bee192173ac3636b5fb6e6715d"
dependencies = [
"proc-macro2",
"quote",
"sqlx-core",
"sqlx-macros-core",
"syn 2.0.117",
]
[[package]]
name = "sqlx-macros-core"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19a9c1841124ac5a61741f96e1d9e2ec77424bf323962dd894bdb93f37d5219b"
dependencies = [
"dotenvy",
"either",
"heck",
"hex",
"once_cell",
"proc-macro2",
"quote",
"serde",
"serde_json",
"sha2",
"sqlx-core",
"sqlx-mysql",
"sqlx-postgres",
"sqlx-sqlite",
"syn 2.0.117",
"tokio",
"url",
]
[[package]]
name = "sqlx-mysql"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aa003f0038df784eb8fecbbac13affe3da23b45194bd57dba231c8f48199c526"
dependencies = [
"atoi",
"base64 0.22.1",
"bitflags 2.11.0",
"byteorder",
"bytes",
"chrono",
"crc",
"digest",
"dotenvy",
"either",
"futures-channel",
"futures-core",
"futures-io",
"futures-util",
"generic-array",
"hex",
"hkdf",
"hmac",
"itoa",
"log",
"md-5",
"memchr",
"once_cell",
"percent-encoding",
"rand 0.8.5",
"rsa",
"serde",
"sha1",
"sha2",
"smallvec",
"sqlx-core",
"stringprep",
"thiserror 2.0.18",
"tracing",
"uuid",
"whoami",
]
[[package]]
name = "sqlx-postgres"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db58fcd5a53cf07c184b154801ff91347e4c30d17a3562a635ff028ad5deda46"
dependencies = [
"atoi",
"base64 0.22.1",
"bitflags 2.11.0",
"byteorder",
"chrono",
"crc",
"dotenvy",
"etcetera",
"futures-channel",
"futures-core",
"futures-util",
"hex",
"hkdf",
"hmac",
"home",
"itoa",
"log",
"md-5",
"memchr",
"once_cell",
"rand 0.8.5",
"serde",
"serde_json",
"sha2",
"smallvec",
"sqlx-core",
"stringprep",
"thiserror 2.0.18",
"tracing",
"uuid",
"whoami",
]
[[package]]
name = "sqlx-sqlite"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2d12fe70b2c1b4401038055f90f151b78208de1f9f89a7dbfd41587a10c3eea"
dependencies = [
"atoi",
"chrono",
"flume",
"futures-channel",
"futures-core",
"futures-executor",
"futures-intrusive",
"futures-util",
"libsqlite3-sys",
"log",
"percent-encoding",
"serde",
"serde_urlencoded",
"sqlx-core",
"thiserror 2.0.18",
"tracing",
"url",
"uuid",
]
[[package]]
name = "stable_deref_trait"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
[[package]]
name = "stringprep"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b4df3d392d81bd458a8a621b8bffbd2302a12ffe288a9d931670948749463b1"
dependencies = [
"unicode-bidi",
"unicode-normalization",
"unicode-properties",
]
[[package]]
name = "strsim"
version = "0.11.1"
@@ -3920,12 +4319,33 @@ version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142"
[[package]]
name = "unicode-bidi"
version = "0.3.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5"
[[package]]
name = "unicode-ident"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
[[package]]
name = "unicode-normalization"
version = "0.1.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8"
dependencies = [
"tinyvec",
]
[[package]]
name = "unicode-properties"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d"
[[package]]
name = "unicode-xid"
version = "0.2.6"
@@ -3977,6 +4397,7 @@ checksum = "a68d3c8f01c0cfa54a75291d83601161799e4a89a39e0929f4b0354d88757a37"
dependencies = [
"getrandom 0.4.2",
"js-sys",
"serde_core",
"wasm-bindgen",
]
@@ -3986,6 +4407,12 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65"
[[package]]
name = "vcpkg"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
[[package]]
name = "version_check"
version = "0.9.5"
@@ -4025,6 +4452,12 @@ dependencies = [
"wit-bindgen",
]
[[package]]
name = "wasite"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b"
[[package]]
name = "wasm-bindgen"
version = "0.2.114"
@@ -4144,6 +4577,15 @@ version = "0.25.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f20c57d8d7db6d3b86154206ae5d8fba62dd39573114de97c2cb0578251f8e1"
[[package]]
name = "webpki-roots"
version = "0.26.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9"
dependencies = [
"webpki-roots 1.0.6",
]
[[package]]
name = "webpki-roots"
version = "1.0.6"
@@ -4153,6 +4595,16 @@ dependencies = [
"rustls-pki-types",
]
[[package]]
name = "whoami"
version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d4a4db5077702ca3015d3d02d74974948aba2ad9e12ab7df718ee64ccd7e97d"
dependencies = [
"libredox",
"wasite",
]
[[package]]
name = "winapi"
version = "0.3.9"

View File

@@ -4,11 +4,13 @@ members = [
"furumi-server",
"furumi-client-core",
"furumi-mount-linux",
"furumi-mount-macos"
"furumi-mount-macos",
"furumi-agent",
]
default-members = [
"furumi-common",
"furumi-server",
"furumi-client-core",
"furumi-agent",
]
resolver = "2"

23
furumi-agent/Cargo.toml Normal file
View File

@@ -0,0 +1,23 @@
[package]
name = "furumi-agent"
version = "0.3.4"
edition = "2024"
[dependencies]
anyhow = "1.0"
blake3 = "1"
chrono = { version = "0.4", features = ["serde"] }
clap = { version = "4.5", features = ["derive", "env"] }
encoding_rs = "0.8"
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls", "json"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
sqlx = { version = "0.8", features = ["runtime-tokio-rustls", "postgres", "chrono", "uuid", "migrate"] }
symphonia = { version = "0.5", default-features = false, features = ["mp3", "aac", "flac", "vorbis", "wav", "alac", "adpcm", "pcm", "mpa", "isomp4", "ogg", "aiff", "mkv"] }
thiserror = "2.0"
tokio = { version = "1.50", features = ["full"] }
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
axum = { version = "0.7", features = ["tokio", "macros"] }
tower = { version = "0.4", features = ["util"] }
uuid = { version = "1", features = ["v4", "serde"] }

View File

@@ -0,0 +1,86 @@
CREATE EXTENSION IF NOT EXISTS pg_trgm;
CREATE TABLE artists (
id BIGSERIAL PRIMARY KEY,
name TEXT NOT NULL UNIQUE,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE TABLE albums (
id BIGSERIAL PRIMARY KEY,
artist_id BIGINT NOT NULL REFERENCES artists(id) ON DELETE CASCADE,
name TEXT NOT NULL,
year INT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE (artist_id, name)
);
CREATE TABLE tracks (
id BIGSERIAL PRIMARY KEY,
artist_id BIGINT NOT NULL REFERENCES artists(id) ON DELETE CASCADE,
album_id BIGINT REFERENCES albums(id) ON DELETE SET NULL,
title TEXT NOT NULL,
track_number INT,
genre TEXT,
duration_secs DOUBLE PRECISION,
codec TEXT,
bitrate INT,
sample_rate INT,
file_hash TEXT NOT NULL UNIQUE,
file_size BIGINT NOT NULL,
storage_path TEXT NOT NULL,
manual_override BOOLEAN NOT NULL DEFAULT FALSE,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE TABLE pending_tracks (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
status TEXT NOT NULL DEFAULT 'pending',
inbox_path TEXT NOT NULL,
file_hash TEXT NOT NULL,
file_size BIGINT NOT NULL,
raw_title TEXT,
raw_artist TEXT,
raw_album TEXT,
raw_year INT,
raw_track_number INT,
raw_genre TEXT,
duration_secs DOUBLE PRECISION,
path_title TEXT,
path_artist TEXT,
path_album TEXT,
path_year INT,
path_track_number INT,
norm_title TEXT,
norm_artist TEXT,
norm_album TEXT,
norm_year INT,
norm_track_number INT,
norm_genre TEXT,
norm_featured_artists TEXT,
confidence DOUBLE PRECISION,
llm_notes TEXT,
error_message TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE TABLE track_artists (
id BIGSERIAL PRIMARY KEY,
track_id BIGINT NOT NULL REFERENCES tracks(id) ON DELETE CASCADE,
artist_id BIGINT NOT NULL REFERENCES artists(id) ON DELETE CASCADE,
role TEXT NOT NULL DEFAULT 'featured',
UNIQUE (track_id, artist_id, role)
);
-- Indexes
CREATE INDEX idx_artists_name_trgm ON artists USING gin (name gin_trgm_ops);
CREATE INDEX idx_albums_name_trgm ON albums USING gin (name gin_trgm_ops);
CREATE INDEX idx_tracks_file_hash ON tracks (file_hash);
CREATE INDEX idx_pending_status ON pending_tracks (status);
CREATE INDEX idx_pending_file_hash ON pending_tracks (file_hash);
CREATE INDEX idx_track_artists_track ON track_artists (track_id);
CREATE INDEX idx_track_artists_artist ON track_artists (artist_id);

View File

@@ -0,0 +1,37 @@
-- Add slug (public unique ID) to tracks
ALTER TABLE tracks ADD COLUMN slug TEXT;
-- Generate slugs for existing tracks
UPDATE tracks SET slug = encode(gen_random_uuid()::text::bytea, 'hex') WHERE slug IS NULL;
ALTER TABLE tracks ALTER COLUMN slug SET NOT NULL;
CREATE UNIQUE INDEX idx_tracks_slug ON tracks (slug);
-- Add slug to albums
ALTER TABLE albums ADD COLUMN slug TEXT;
UPDATE albums SET slug = encode(gen_random_uuid()::text::bytea, 'hex') WHERE slug IS NULL;
ALTER TABLE albums ALTER COLUMN slug SET NOT NULL;
CREATE UNIQUE INDEX idx_albums_slug ON albums (slug);
-- Add slug to artists
ALTER TABLE artists ADD COLUMN slug TEXT;
UPDATE artists SET slug = encode(gen_random_uuid()::text::bytea, 'hex') WHERE slug IS NULL;
ALTER TABLE artists ALTER COLUMN slug SET NOT NULL;
CREATE UNIQUE INDEX idx_artists_slug ON artists (slug);
-- Album artwork table
CREATE TABLE album_images (
id BIGSERIAL PRIMARY KEY,
album_id BIGINT NOT NULL REFERENCES albums(id) ON DELETE CASCADE,
image_type TEXT NOT NULL DEFAULT 'cover', -- 'cover', 'back', 'booklet', 'other'
file_path TEXT NOT NULL, -- relative path in storage
file_hash TEXT NOT NULL,
mime_type TEXT NOT NULL,
width INT,
height INT,
file_size BIGINT NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX idx_album_images_album ON album_images (album_id);
CREATE UNIQUE INDEX idx_album_images_hash ON album_images (file_hash);

View File

@@ -0,0 +1,80 @@
You are a music metadata normalization assistant. Your job is to take raw metadata extracted from audio files and produce clean, accurate, canonical metadata suitable for a music library database.
## Rules
1. **Artist names** must use correct capitalization and canonical spelling. Examples:
- "pink floyd" → "Pink Floyd"
- "AC DC" → "AC/DC"
- "Guns n roses" → "Guns N' Roses"
- "Led zepplin" → "Led Zeppelin" (fix common misspellings)
- "саша скул" → "Саша Скул" (fix capitalization, keep the language as-is)
- If the database already contains a matching artist (same name in any case or transliteration), always use the existing canonical name exactly. For example, if the DB has "Саша Скул" and the file says "саша скул" or "Sasha Skul", use "Саша Скул".
- **Compound artist fields**: When the artist field or path contains multiple artist names joined by "и", "and", "&", "/", ",", "x", or "vs", you MUST split them. The "artist" field must contain ONLY ONE primary artist. All others go into "featured_artists". If one of the names already exists in the database, prefer that one as the primary artist.
- Examples:
- Artist or path: "Саша Скул и Олег Харитонов" with DB containing "Саша Скул" → artist: "Саша Скул", featured_artists: ["Олег Харитонов"]
- Artist: "Metallica & Lou Reed" with DB containing "Metallica" → artist: "Metallica", featured_artists: ["Lou Reed"]
- Artist: "Artist A / Artist B" with neither in DB → artist: "Artist A", featured_artists: ["Artist B"] (first listed = primary)
- **NEVER create a new compound artist** like "X и Y" or "X & Y" as a single artist name. Always split into primary + featured.
2. **Featured artists**: Many tracks include collaborations. Guest artists can be indicated by ANY of the following markers (case-insensitive) in the artist field, track title, filename, or path:
- English: "feat.", "ft.", "featuring", "with"
- Russian: "п.у.", "при участии"
- Parenthetical: "(feat. X)", "(ft. X)", "(п.у. X)", "(при участии X)"
- Any other language-specific equivalent indicating a guest/featured collaboration
You must:
- Extract the **primary artist** (the main performer) into the "artist" field.
- Extract ALL **featured/guest artists** into a separate "featured_artists" array.
- Remove the collaboration marker and featured artist names from the track title, keeping only the song name.
- When multiple featured artists are listed, split them by commas or "&" into separate entries.
- Examples:
- Artist: "НСМВГЛП feat. XACV SQUAD" → artist: "НСМВГЛП", featured_artists: ["XACV SQUAD"]
- Title: "Знаешь ли ты feat. SharOn" → title: "Знаешь ли ты", featured_artists: ["SharOn"]
- Title: "Ваши мамки (п.у. Ваня Айван,Иван Смех, Жильцов)" → title: "Ваши мамки", featured_artists: ["Ваня Айван", "Иван Смех", "Жильцов"]
- Title: "Молоды (п.у. Паша Батруха)" → title: "Молоды", featured_artists: ["Паша Батруха"]
- Title: "Повелитель Мух (п.у. Пикуль)" → title: "Повелитель Мух", featured_artists: ["Пикуль"]
- Artist: "A & B ft. C, D" → artist: "A & B", featured_artists: ["C", "D"]
- **IMPORTANT**: Always check for parenthetical markers like "(п.у. ...)" or "(feat. ...)" at the end of track titles. These are very common and must not be missed.
- Apply the same capitalization and consistency rules to featured artist names.
- If the database already contains a matching featured artist name, use the existing canonical form.
3. **Album names** must use correct capitalization and canonical spelling.
- Use title case for English albums.
- Preserve original language for non-English albums.
- If the database already contains a matching album under the same artist, use the existing name exactly.
- Do not alter the creative content of album names (same principle as track titles).
4. **Track titles** must use correct capitalization, but their content must be preserved exactly.
- Use title case for English titles.
- Preserve original language for non-English titles.
- Remove leading track numbers if present (e.g., "01 - Have a Cigar" → "Have a Cigar").
- **NEVER remove, add, or alter words, numbers, suffixes, punctuation marks, or special characters in titles.** Your job is to fix capitalization and encoding, not to edit the creative content. If a title contains unusual punctuation, numbers, apostrophes, or symbols — they are intentional and must be kept as-is.
- If all tracks in the same album follow a naming pattern (e.g., numbered names like "Part 1", "Part 2"), preserve that pattern consistently. Do not simplify or truncate individual track names.
5. **Year**: If not present in tags, try to infer from the file path. Only set a year if you are confident it is correct.
6. **Track number**: If not present in tags, try to infer from the filename (e.g., "03 - Song.flac" → track 3).
7. **Genre**: Normalize to a common genre name. Avoid overly specific sub-genres unless the existing database already uses them.
8. **Encoding issues**: Raw metadata may contain mojibake (e.g., Cyrillic text misread as Latin-1). If you detect garbled text that looks like encoding errors, attempt to determine the intended text.
9. **Preservation principle**: When in doubt, preserve the original value. Only change metadata when you are confident the change is a correction (e.g., fixing capitalization, fixing encoding, matching to an existing DB entry). Do not "clean up" or "simplify" values that look unusual — artists often use unconventional naming intentionally.
10. **Consistency**: When the database already contains entries for an artist or album, your output MUST match the existing canonical names. Do not introduce new variations.
11. **Confidence**: Rate your confidence from 0.0 to 1.0.
- 1.0: All fields are clear and unambiguous.
- 0.8+: Minor inferences made (e.g., year from path), but high certainty.
- 0.5-0.8: Some guesswork involved, human review recommended.
- Below 0.5: Significant uncertainty, definitely needs review.
## Response format
You MUST respond with a single JSON object, no markdown fences, no extra text:
{"artist": "...", "album": "...", "title": "...", "year": 2000, "track_number": 1, "genre": "...", "featured_artists": ["...", "..."], "confidence": 0.95, "notes": "brief explanation of changes made"}
- Use null for fields you cannot determine.
- Use an empty array [] for "featured_artists" if there are no featured artists.
- The "notes" field should briefly explain what you changed and why.

View File

@@ -0,0 +1,75 @@
use std::path::PathBuf;
use clap::Parser;
/// Default system prompt, compiled into the binary as a fallback.
const DEFAULT_SYSTEM_PROMPT: &str = include_str!("../prompts/normalize.txt");
#[derive(Parser, Debug)]
#[command(version, about = "Furumi Agent: music metadata ingest and normalization")]
pub struct Args {
/// IP address and port for the admin web UI
#[arg(long, env = "FURUMI_AGENT_BIND", default_value = "0.0.0.0:8090")]
pub bind: String,
/// Directory to watch for new music files
#[arg(long, env = "FURUMI_AGENT_INBOX_DIR")]
pub inbox_dir: PathBuf,
/// Directory for permanently stored and organized music files
#[arg(long, env = "FURUMI_AGENT_STORAGE_DIR")]
pub storage_dir: PathBuf,
/// PostgreSQL connection URL
#[arg(long, env = "FURUMI_AGENT_DATABASE_URL")]
pub database_url: String,
/// Ollama API base URL
#[arg(long, env = "FURUMI_AGENT_OLLAMA_URL", default_value = "http://localhost:11434")]
pub ollama_url: String,
/// Ollama model name
#[arg(long, env = "FURUMI_AGENT_OLLAMA_MODEL", default_value = "qwen3:14b")]
pub ollama_model: String,
/// Inbox scan interval in seconds
#[arg(long, env = "FURUMI_AGENT_POLL_INTERVAL_SECS", default_value_t = 30)]
pub poll_interval_secs: u64,
/// Confidence threshold for auto-approval (0.0 - 1.0)
#[arg(long, env = "FURUMI_AGENT_CONFIDENCE_THRESHOLD", default_value_t = 0.85)]
pub confidence_threshold: f64,
/// Path to a custom system prompt file (overrides the built-in default)
#[arg(long, env = "FURUMI_AGENT_SYSTEM_PROMPT_FILE")]
pub system_prompt_file: Option<PathBuf>,
}
impl Args {
pub fn validate(&self) -> Result<(), Box<dyn std::error::Error>> {
if !self.inbox_dir.exists() || !self.inbox_dir.is_dir() {
return Err(format!("Inbox directory {:?} does not exist or is not a directory", self.inbox_dir).into());
}
if !self.storage_dir.exists() || !self.storage_dir.is_dir() {
return Err(format!("Storage directory {:?} does not exist or is not a directory", self.storage_dir).into());
}
if !(0.0..=1.0).contains(&self.confidence_threshold) {
return Err("Confidence threshold must be between 0.0 and 1.0".into());
}
Ok(())
}
/// Load the system prompt from a custom file or use the built-in default.
pub fn load_system_prompt(&self) -> Result<String, Box<dyn std::error::Error>> {
match &self.system_prompt_file {
Some(path) => {
tracing::info!("Loading system prompt from {:?}", path);
Ok(std::fs::read_to_string(path)?)
}
None => {
tracing::info!("Using built-in default system prompt");
Ok(DEFAULT_SYSTEM_PROMPT.to_owned())
}
}
}
}

554
furumi-agent/src/db.rs Normal file
View File

@@ -0,0 +1,554 @@
use serde::{Deserialize, Serialize};
use sqlx::PgPool;
use sqlx::postgres::PgPoolOptions;
use uuid::Uuid;
/// Generate a short URL-safe slug from a UUID v4.
fn generate_slug() -> String {
Uuid::new_v4().simple().to_string()[..12].to_owned()
}
pub async fn connect(database_url: &str) -> Result<PgPool, sqlx::Error> {
PgPoolOptions::new()
.max_connections(5)
.connect(database_url)
.await
}
pub async fn migrate(pool: &PgPool) -> Result<(), sqlx::migrate::MigrateError> {
sqlx::migrate!("./migrations").run(pool).await
}
// --- Models ---
#[derive(Debug, Clone, Serialize, Deserialize, sqlx::FromRow)]
pub struct Artist {
pub id: i64,
pub name: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, sqlx::FromRow)]
pub struct Album {
pub id: i64,
pub artist_id: i64,
pub name: String,
pub year: Option<i32>,
}
#[derive(Debug, Clone, Serialize, Deserialize, sqlx::FromRow)]
pub struct PendingTrack {
pub id: Uuid,
pub status: String,
pub inbox_path: String,
pub file_hash: String,
pub file_size: i64,
// Raw metadata from file tags
pub raw_title: Option<String>,
pub raw_artist: Option<String>,
pub raw_album: Option<String>,
pub raw_year: Option<i32>,
pub raw_track_number: Option<i32>,
pub raw_genre: Option<String>,
pub duration_secs: Option<f64>,
// Path-derived hints
pub path_artist: Option<String>,
pub path_album: Option<String>,
pub path_year: Option<i32>,
pub path_track_number: Option<i32>,
pub path_title: Option<String>,
// Normalized (LLM output)
pub norm_title: Option<String>,
pub norm_artist: Option<String>,
pub norm_album: Option<String>,
pub norm_year: Option<i32>,
pub norm_track_number: Option<i32>,
pub norm_genre: Option<String>,
pub norm_featured_artists: Option<String>, // JSON array
pub confidence: Option<f64>,
pub llm_notes: Option<String>,
pub error_message: Option<String>,
pub created_at: chrono::DateTime<chrono::Utc>,
pub updated_at: chrono::DateTime<chrono::Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize, sqlx::FromRow)]
pub struct SimilarArtist {
pub id: i64,
pub name: String,
pub similarity: f32,
}
#[derive(Debug, Clone, Serialize, Deserialize, sqlx::FromRow)]
pub struct SimilarAlbum {
pub id: i64,
pub artist_id: i64,
pub name: String,
pub year: Option<i32>,
pub similarity: f32,
}
#[derive(Debug, Clone, Serialize, Deserialize, sqlx::FromRow)]
pub struct AlbumImage {
pub id: i64,
pub album_id: i64,
pub image_type: String,
pub file_path: String,
pub file_hash: String,
pub mime_type: String,
pub width: Option<i32>,
pub height: Option<i32>,
pub file_size: i64,
}
// --- Queries ---
pub async fn file_hash_exists(pool: &PgPool, hash: &str) -> Result<bool, sqlx::Error> {
let row: (bool,) = sqlx::query_as(
"SELECT EXISTS(SELECT 1 FROM tracks WHERE file_hash = $1) OR EXISTS(SELECT 1 FROM pending_tracks WHERE file_hash = $1 AND status NOT IN ('rejected', 'error'))"
)
.bind(hash)
.fetch_one(pool)
.await?;
Ok(row.0)
}
pub async fn insert_pending(
pool: &PgPool,
inbox_path: &str,
file_hash: &str,
file_size: i64,
raw: &RawFields,
path_hints: &PathHints,
duration_secs: Option<f64>,
) -> Result<Uuid, sqlx::Error> {
let row: (Uuid,) = sqlx::query_as(
r#"INSERT INTO pending_tracks
(inbox_path, file_hash, file_size,
raw_title, raw_artist, raw_album, raw_year, raw_track_number, raw_genre,
path_title, path_artist, path_album, path_year, path_track_number,
duration_secs, status)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, 'pending')
RETURNING id"#,
)
.bind(inbox_path)
.bind(file_hash)
.bind(file_size)
.bind(&raw.title)
.bind(&raw.artist)
.bind(&raw.album)
.bind(raw.year)
.bind(raw.track_number)
.bind(&raw.genre)
.bind(&path_hints.title)
.bind(&path_hints.artist)
.bind(&path_hints.album)
.bind(path_hints.year)
.bind(path_hints.track_number)
.bind(duration_secs)
.fetch_one(pool)
.await?;
Ok(row.0)
}
pub async fn update_pending_normalized(
pool: &PgPool,
id: Uuid,
status: &str,
norm: &NormalizedFields,
error_message: Option<&str>,
) -> Result<(), sqlx::Error> {
let featured_json = if norm.featured_artists.is_empty() {
None
} else {
Some(serde_json::to_string(&norm.featured_artists).unwrap_or_default())
};
sqlx::query(
r#"UPDATE pending_tracks SET
status = $2,
norm_title = $3, norm_artist = $4, norm_album = $5,
norm_year = $6, norm_track_number = $7, norm_genre = $8,
norm_featured_artists = $9,
confidence = $10, llm_notes = $11, error_message = $12,
updated_at = NOW()
WHERE id = $1"#,
)
.bind(id)
.bind(status)
.bind(&norm.title)
.bind(&norm.artist)
.bind(&norm.album)
.bind(norm.year)
.bind(norm.track_number)
.bind(&norm.genre)
.bind(&featured_json)
.bind(norm.confidence)
.bind(&norm.notes)
.bind(error_message)
.execute(pool)
.await?;
Ok(())
}
pub async fn update_pending_status(
pool: &PgPool,
id: Uuid,
status: &str,
error_message: Option<&str>,
) -> Result<(), sqlx::Error> {
sqlx::query("UPDATE pending_tracks SET status = $2, error_message = $3, updated_at = NOW() WHERE id = $1")
.bind(id)
.bind(status)
.bind(error_message)
.execute(pool)
.await?;
Ok(())
}
pub async fn find_similar_artists(pool: &PgPool, name: &str, limit: i32) -> Result<Vec<SimilarArtist>, sqlx::Error> {
// pg_trgm needs at least 3 chars to produce trigrams; for shorter queries use ILIKE prefix
if name.chars().count() < 3 {
sqlx::query_as::<_, SimilarArtist>(
"SELECT id, name, 1.0::real AS similarity FROM artists WHERE name ILIKE $1 || '%' ORDER BY name LIMIT $2"
)
.bind(name)
.bind(limit)
.fetch_all(pool)
.await
} else {
sqlx::query_as::<_, SimilarArtist>(
r#"SELECT id, name, MAX(sim) AS similarity FROM (
SELECT id, name, similarity(name, $1) AS sim FROM artists WHERE name % $1
UNION ALL
SELECT id, name, 0.01::real AS sim FROM artists WHERE name ILIKE '%' || $1 || '%'
) sub GROUP BY id, name ORDER BY similarity DESC LIMIT $2"#
)
.bind(name)
.bind(limit)
.fetch_all(pool)
.await
}
}
pub async fn find_similar_albums(pool: &PgPool, name: &str, limit: i32) -> Result<Vec<SimilarAlbum>, sqlx::Error> {
sqlx::query_as::<_, SimilarAlbum>(
"SELECT id, artist_id, name, year, similarity(name, $1) AS similarity FROM albums WHERE name % $1 ORDER BY similarity DESC LIMIT $2"
)
.bind(name)
.bind(limit)
.fetch_all(pool)
.await
}
pub async fn upsert_artist(pool: &PgPool, name: &str) -> Result<i64, sqlx::Error> {
let slug = generate_slug();
let row: (i64,) = sqlx::query_as(
"INSERT INTO artists (name, slug) VALUES ($1, $2) ON CONFLICT (name) DO UPDATE SET name = EXCLUDED.name RETURNING id"
)
.bind(name)
.bind(&slug)
.fetch_one(pool)
.await?;
Ok(row.0)
}
pub async fn upsert_album(pool: &PgPool, artist_id: i64, name: &str, year: Option<i32>) -> Result<i64, sqlx::Error> {
let slug = generate_slug();
let row: (i64,) = sqlx::query_as(
r#"INSERT INTO albums (artist_id, name, year, slug)
VALUES ($1, $2, $3, $4)
ON CONFLICT (artist_id, name) DO UPDATE SET year = COALESCE(EXCLUDED.year, albums.year)
RETURNING id"#
)
.bind(artist_id)
.bind(name)
.bind(year)
.bind(&slug)
.fetch_one(pool)
.await?;
Ok(row.0)
}
pub async fn insert_track(
pool: &PgPool,
artist_id: i64,
album_id: Option<i64>,
title: &str,
track_number: Option<i32>,
genre: Option<&str>,
duration_secs: Option<f64>,
file_hash: &str,
file_size: i64,
storage_path: &str,
) -> Result<i64, sqlx::Error> {
let slug = generate_slug();
let row: (i64,) = sqlx::query_as(
r#"INSERT INTO tracks
(artist_id, album_id, title, track_number, genre, duration_secs, file_hash, file_size, storage_path, slug)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
RETURNING id"#
)
.bind(artist_id)
.bind(album_id)
.bind(title)
.bind(track_number)
.bind(genre)
.bind(duration_secs)
.bind(file_hash)
.bind(file_size)
.bind(storage_path)
.bind(&slug)
.fetch_one(pool)
.await?;
Ok(row.0)
}
pub async fn link_track_artist(pool: &PgPool, track_id: i64, artist_id: i64, role: &str) -> Result<(), sqlx::Error> {
sqlx::query(
"INSERT INTO track_artists (track_id, artist_id, role) VALUES ($1, $2, $3) ON CONFLICT DO NOTHING"
)
.bind(track_id)
.bind(artist_id)
.bind(role)
.execute(pool)
.await?;
Ok(())
}
pub async fn approve_and_finalize(
pool: &PgPool,
pending_id: Uuid,
storage_path: &str,
) -> Result<i64, sqlx::Error> {
let pt: PendingTrack = sqlx::query_as("SELECT * FROM pending_tracks WHERE id = $1")
.bind(pending_id)
.fetch_one(pool)
.await?;
let artist_name = pt.norm_artist.as_deref().unwrap_or("Unknown Artist");
let artist_id = upsert_artist(pool, artist_name).await?;
let album_id = match pt.norm_album.as_deref() {
Some(album_name) => Some(upsert_album(pool, artist_id, album_name, pt.norm_year).await?),
None => None,
};
let title = pt.norm_title.as_deref().unwrap_or("Unknown Title");
let track_id = insert_track(
pool,
artist_id,
album_id,
title,
pt.norm_track_number,
pt.norm_genre.as_deref(),
pt.duration_secs,
&pt.file_hash,
pt.file_size,
storage_path,
)
.await?;
// Link primary artist
link_track_artist(pool, track_id, artist_id, "primary").await?;
// Link featured artists
if let Some(featured_json) = &pt.norm_featured_artists {
if let Ok(featured) = serde_json::from_str::<Vec<String>>(featured_json) {
for feat_name in &featured {
let feat_id = upsert_artist(pool, feat_name).await?;
link_track_artist(pool, track_id, feat_id, "featured").await?;
}
}
}
update_pending_status(pool, pending_id, "approved", None).await?;
Ok(track_id)
}
// --- Album images ---
pub async fn image_hash_exists(pool: &PgPool, hash: &str) -> Result<bool, sqlx::Error> {
let row: (bool,) = sqlx::query_as("SELECT EXISTS(SELECT 1 FROM album_images WHERE file_hash = $1)")
.bind(hash)
.fetch_one(pool)
.await?;
Ok(row.0)
}
pub async fn insert_album_image(
pool: &PgPool,
album_id: i64,
image_type: &str,
file_path: &str,
file_hash: &str,
mime_type: &str,
file_size: i64,
) -> Result<i64, sqlx::Error> {
let row: (i64,) = sqlx::query_as(
r#"INSERT INTO album_images (album_id, image_type, file_path, file_hash, mime_type, file_size)
VALUES ($1, $2, $3, $4, $5, $6)
ON CONFLICT (file_hash) DO NOTHING
RETURNING id"#
)
.bind(album_id)
.bind(image_type)
.bind(file_path)
.bind(file_hash)
.bind(mime_type)
.bind(file_size)
.fetch_one(pool)
.await?;
Ok(row.0)
}
pub async fn get_album_images(pool: &PgPool, album_id: i64) -> Result<Vec<AlbumImage>, sqlx::Error> {
sqlx::query_as::<_, AlbumImage>("SELECT * FROM album_images WHERE album_id = $1 ORDER BY image_type")
.bind(album_id)
.fetch_all(pool)
.await
}
/// Find album_id by artist+album name (used when linking covers to already-finalized albums)
pub async fn find_album_id(pool: &PgPool, artist_name: &str, album_name: &str) -> Result<Option<i64>, sqlx::Error> {
let row: Option<(i64,)> = sqlx::query_as(
r#"SELECT a.id FROM albums a
JOIN artists ar ON a.artist_id = ar.id
WHERE ar.name = $1 AND a.name = $2"#
)
.bind(artist_name)
.bind(album_name)
.fetch_optional(pool)
.await?;
Ok(row.map(|r| r.0))
}
// --- DTOs for insert helpers ---
#[derive(Debug, Default)]
pub struct RawFields {
pub title: Option<String>,
pub artist: Option<String>,
pub album: Option<String>,
pub year: Option<i32>,
pub track_number: Option<i32>,
pub genre: Option<String>,
}
#[derive(Debug, Default)]
pub struct PathHints {
pub title: Option<String>,
pub artist: Option<String>,
pub album: Option<String>,
pub year: Option<i32>,
pub track_number: Option<i32>,
}
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct NormalizedFields {
pub title: Option<String>,
pub artist: Option<String>,
pub album: Option<String>,
pub year: Option<i32>,
pub track_number: Option<i32>,
pub genre: Option<String>,
#[serde(default)]
pub featured_artists: Vec<String>,
pub confidence: Option<f64>,
pub notes: Option<String>,
}
// --- Admin queries ---
pub async fn list_pending(pool: &PgPool, status_filter: Option<&str>, limit: i64, offset: i64) -> Result<Vec<PendingTrack>, sqlx::Error> {
match status_filter {
Some(status) => {
sqlx::query_as::<_, PendingTrack>(
"SELECT * FROM pending_tracks WHERE status = $1 ORDER BY created_at DESC LIMIT $2 OFFSET $3"
)
.bind(status)
.bind(limit)
.bind(offset)
.fetch_all(pool)
.await
}
None => {
sqlx::query_as::<_, PendingTrack>(
"SELECT * FROM pending_tracks ORDER BY created_at DESC LIMIT $1 OFFSET $2"
)
.bind(limit)
.bind(offset)
.fetch_all(pool)
.await
}
}
}
pub async fn get_pending(pool: &PgPool, id: Uuid) -> Result<Option<PendingTrack>, sqlx::Error> {
sqlx::query_as::<_, PendingTrack>("SELECT * FROM pending_tracks WHERE id = $1")
.bind(id)
.fetch_optional(pool)
.await
}
pub async fn delete_pending(pool: &PgPool, id: Uuid) -> Result<bool, sqlx::Error> {
let result = sqlx::query("DELETE FROM pending_tracks WHERE id = $1")
.bind(id)
.execute(pool)
.await?;
Ok(result.rows_affected() > 0)
}
pub async fn list_artists_all(pool: &PgPool) -> Result<Vec<Artist>, sqlx::Error> {
sqlx::query_as::<_, Artist>("SELECT id, name FROM artists ORDER BY name")
.fetch_all(pool)
.await
}
pub async fn list_albums_by_artist(pool: &PgPool, artist_id: i64) -> Result<Vec<Album>, sqlx::Error> {
sqlx::query_as::<_, Album>("SELECT id, artist_id, name, year FROM albums WHERE artist_id = $1 ORDER BY year, name")
.bind(artist_id)
.fetch_all(pool)
.await
}
pub async fn update_artist_name(pool: &PgPool, id: i64, name: &str) -> Result<bool, sqlx::Error> {
let result = sqlx::query("UPDATE artists SET name = $2 WHERE id = $1")
.bind(id)
.bind(name)
.execute(pool)
.await?;
Ok(result.rows_affected() > 0)
}
pub async fn update_album(pool: &PgPool, id: i64, name: &str, year: Option<i32>) -> Result<bool, sqlx::Error> {
let result = sqlx::query("UPDATE albums SET name = $2, year = $3 WHERE id = $1")
.bind(id)
.bind(name)
.bind(year)
.execute(pool)
.await?;
Ok(result.rows_affected() > 0)
}
#[derive(Debug, Serialize)]
pub struct Stats {
pub total_tracks: i64,
pub total_artists: i64,
pub total_albums: i64,
pub pending_count: i64,
pub review_count: i64,
pub error_count: i64,
}
pub async fn get_stats(pool: &PgPool) -> Result<Stats, sqlx::Error> {
let (total_tracks,): (i64,) = sqlx::query_as("SELECT COUNT(*) FROM tracks").fetch_one(pool).await?;
let (total_artists,): (i64,) = sqlx::query_as("SELECT COUNT(*) FROM artists").fetch_one(pool).await?;
let (total_albums,): (i64,) = sqlx::query_as("SELECT COUNT(*) FROM albums").fetch_one(pool).await?;
let (pending_count,): (i64,) = sqlx::query_as("SELECT COUNT(*) FROM pending_tracks WHERE status = 'pending'").fetch_one(pool).await?;
let (review_count,): (i64,) = sqlx::query_as("SELECT COUNT(*) FROM pending_tracks WHERE status = 'review'").fetch_one(pool).await?;
let (error_count,): (i64,) = sqlx::query_as("SELECT COUNT(*) FROM pending_tracks WHERE status = 'error'").fetch_one(pool).await?;
Ok(Stats { total_tracks, total_artists, total_albums, pending_count, review_count, error_count })
}

View File

@@ -0,0 +1,129 @@
use std::path::Path;
use symphonia::core::{
codecs::CODEC_TYPE_NULL,
formats::FormatOptions,
io::MediaSourceStream,
meta::{MetadataOptions, StandardTagKey},
probe::Hint,
};
#[derive(Debug, Default)]
pub struct RawMetadata {
pub title: Option<String>,
pub artist: Option<String>,
pub album: Option<String>,
pub track_number: Option<u32>,
pub year: Option<u32>,
pub genre: Option<String>,
pub duration_secs: Option<f64>,
}
/// Extract metadata from an audio file using Symphonia.
/// Must be called from a blocking context (spawn_blocking).
pub fn extract(path: &Path) -> anyhow::Result<RawMetadata> {
let file = std::fs::File::open(path)?;
let mss = MediaSourceStream::new(Box::new(file), Default::default());
let mut hint = Hint::new();
if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
hint.with_extension(ext);
}
let mut probed = symphonia::default::get_probe().format(
&hint,
mss,
&FormatOptions { enable_gapless: false, ..Default::default() },
&MetadataOptions::default(),
)?;
let mut meta = RawMetadata::default();
// Check metadata side-data (e.g., ID3 tags probed before format)
if let Some(rev) = probed.metadata.get().as_ref().and_then(|m| m.current()) {
extract_tags(rev.tags(), &mut meta);
}
// Also check format-embedded metadata
if let Some(rev) = probed.format.metadata().current() {
if meta.title.is_none() {
extract_tags(rev.tags(), &mut meta);
}
}
// Duration
meta.duration_secs = probed
.format
.tracks()
.iter()
.find(|t| t.codec_params.codec != CODEC_TYPE_NULL)
.and_then(|t| {
let n_frames = t.codec_params.n_frames?;
let tb = t.codec_params.time_base?;
Some(n_frames as f64 * tb.numer as f64 / tb.denom as f64)
});
Ok(meta)
}
fn extract_tags(tags: &[symphonia::core::meta::Tag], meta: &mut RawMetadata) {
for tag in tags {
let value = fix_encoding(tag.value.to_string());
if let Some(key) = tag.std_key {
match key {
StandardTagKey::TrackTitle => {
if meta.title.is_none() {
meta.title = Some(value);
}
}
StandardTagKey::Artist | StandardTagKey::Performer => {
if meta.artist.is_none() {
meta.artist = Some(value);
}
}
StandardTagKey::Album => {
if meta.album.is_none() {
meta.album = Some(value);
}
}
StandardTagKey::TrackNumber => {
if meta.track_number.is_none() {
meta.track_number = value.parse().ok();
}
}
StandardTagKey::Date | StandardTagKey::OriginalDate => {
if meta.year.is_none() {
meta.year = value[..4.min(value.len())].parse().ok();
}
}
StandardTagKey::Genre => {
if meta.genre.is_none() {
meta.genre = Some(value);
}
}
_ => {}
}
}
}
}
/// Heuristic to fix mojibake (CP1251 bytes interpreted as Latin-1/Windows-1252).
fn fix_encoding(s: String) -> String {
let bytes: Vec<u8> = s.chars().map(|c| c as u32).filter(|&c| c <= 255).map(|c| c as u8).collect();
if bytes.len() != s.chars().count() {
return s;
}
let has_mojibake = bytes.iter().any(|&b| b >= 0xC0);
if !has_mojibake {
return s;
}
let (decoded, _, errors) = encoding_rs::WINDOWS_1251.decode(&bytes);
if errors {
return s;
}
decoded.into_owned()
}

View File

@@ -0,0 +1,518 @@
pub mod metadata;
pub mod normalize;
pub mod path_hints;
pub mod mover;
use std::sync::Arc;
use std::time::Duration;
use crate::db;
use crate::web::AppState;
pub async fn run(state: Arc<AppState>) {
let interval = Duration::from_secs(state.config.poll_interval_secs);
tracing::info!("Ingest loop started, polling every {}s: {:?}", state.config.poll_interval_secs, state.config.inbox_dir);
loop {
match scan_inbox(&state).await {
Ok(0) => {}
Ok(count) => tracing::info!(count, "processed new files"),
Err(e) => tracing::error!(?e, "inbox scan failed"),
}
tokio::time::sleep(interval).await;
}
}
async fn scan_inbox(state: &Arc<AppState>) -> anyhow::Result<usize> {
let mut count = 0;
let mut audio_files = Vec::new();
let mut image_files = Vec::new();
collect_files(&state.config.inbox_dir, &mut audio_files, &mut image_files).await?;
if !audio_files.is_empty() || !image_files.is_empty() {
tracing::info!("Scan found {} audio file(s) and {} image(s) in inbox", audio_files.len(), image_files.len());
}
for file_path in &audio_files {
match process_file(state, file_path).await {
Ok(true) => count += 1,
Ok(false) => tracing::debug!(path = ?file_path, "skipped (already known)"),
Err(e) => tracing::warn!(?e, path = ?file_path, "failed to process file"),
}
}
// Process cover images after audio (so albums exist in DB)
for image_path in &image_files {
match process_cover_image(state, image_path).await {
Ok(true) => {
tracing::info!(path = ?image_path, "Cover image processed");
count += 1;
}
Ok(false) => tracing::debug!(path = ?image_path, "cover image skipped"),
Err(e) => tracing::warn!(?e, path = ?image_path, "failed to process cover image"),
}
}
Ok(count)
}
/// Recursively collect all audio files and image files under a directory.
async fn collect_files(dir: &std::path::Path, audio: &mut Vec<std::path::PathBuf>, images: &mut Vec<std::path::PathBuf>) -> anyhow::Result<()> {
let mut entries = tokio::fs::read_dir(dir).await?;
while let Some(entry) = entries.next_entry().await? {
let name = entry.file_name().to_string_lossy().into_owned();
if name.starts_with('.') {
continue;
}
let ft = entry.file_type().await?;
if ft.is_dir() {
Box::pin(collect_files(&entry.path(), audio, images)).await?;
} else if ft.is_file() {
if is_audio_file(&name) {
audio.push(entry.path());
} else if is_cover_image(&name) {
images.push(entry.path());
}
}
}
Ok(())
}
fn is_audio_file(name: &str) -> bool {
let ext = name.rsplit('.').next().unwrap_or("").to_lowercase();
matches!(
ext.as_str(),
"mp3" | "flac" | "ogg" | "opus" | "aac" | "m4a" | "wav" | "ape" | "wv" | "wma" | "tta" | "aiff" | "aif"
)
}
fn is_cover_image(name: &str) -> bool {
let ext = name.rsplit('.').next().unwrap_or("").to_lowercase();
if !matches!(ext.as_str(), "jpg" | "jpeg" | "png" | "webp" | "bmp" | "gif") {
return false;
}
let stem = std::path::Path::new(name)
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("")
.to_lowercase();
matches!(
stem.as_str(),
"cover" | "front" | "folder" | "back" | "booklet" | "inlay" | "disc" | "cd"
| "album" | "artwork" | "art" | "scan" | "thumb" | "thumbnail"
)
}
fn classify_image(name: &str) -> &'static str {
let stem = std::path::Path::new(name)
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("")
.to_lowercase();
match stem.as_str() {
"back" => "back",
"booklet" | "inlay" | "scan" => "booklet",
"disc" | "cd" => "disc",
_ => "cover",
}
}
fn mime_for_image(name: &str) -> &'static str {
let ext = name.rsplit('.').next().unwrap_or("").to_lowercase();
match ext.as_str() {
"jpg" | "jpeg" => "image/jpeg",
"png" => "image/png",
"webp" => "image/webp",
"gif" => "image/gif",
"bmp" => "image/bmp",
_ => "application/octet-stream",
}
}
async fn process_file(state: &Arc<AppState>, file_path: &std::path::Path) -> anyhow::Result<bool> {
let filename = file_path.file_name().and_then(|n| n.to_str()).unwrap_or("?");
tracing::info!(file = filename, "Processing new file: {:?}", file_path);
// Compute file hash for dedup
tracing::info!(file = filename, "Computing file hash...");
let path_clone = file_path.to_path_buf();
let (hash, file_size) = tokio::task::spawn_blocking(move || -> anyhow::Result<(String, i64)> {
let data = std::fs::read(&path_clone)?;
let hash = blake3::hash(&data).to_hex().to_string();
let size = data.len() as i64;
Ok((hash, size))
})
.await??;
tracing::info!(file = filename, hash = &hash[..16], size = file_size, "File hashed");
// Skip if already known
if db::file_hash_exists(&state.pool, &hash).await? {
tracing::info!(file = filename, "Skipping: file hash already exists in database");
return Ok(false);
}
// Extract raw metadata
tracing::info!(file = filename, "Extracting metadata with Symphonia...");
let path_for_meta = file_path.to_path_buf();
let raw_meta = tokio::task::spawn_blocking(move || metadata::extract(&path_for_meta)).await??;
tracing::info!(
file = filename,
artist = raw_meta.artist.as_deref().unwrap_or("-"),
title = raw_meta.title.as_deref().unwrap_or("-"),
album = raw_meta.album.as_deref().unwrap_or("-"),
"Raw metadata extracted"
);
// Parse path hints relative to inbox dir
let relative = file_path.strip_prefix(&state.config.inbox_dir).unwrap_or(file_path);
let hints = path_hints::parse(relative);
if hints.artist.is_some() || hints.album.is_some() || hints.year.is_some() {
tracing::info!(
file = filename,
path_artist = hints.artist.as_deref().unwrap_or("-"),
path_album = hints.album.as_deref().unwrap_or("-"),
path_year = ?hints.year,
"Path hints parsed"
);
}
let inbox_path_str = file_path.to_string_lossy().to_string();
// Insert pending record
tracing::info!(file = filename, "Inserting pending track record...");
let pending_id = db::insert_pending(
&state.pool,
&inbox_path_str,
&hash,
file_size,
&db::RawFields {
title: raw_meta.title.clone(),
artist: raw_meta.artist.clone(),
album: raw_meta.album.clone(),
year: raw_meta.year.map(|y| y as i32),
track_number: raw_meta.track_number.map(|t| t as i32),
genre: raw_meta.genre.clone(),
},
&db::PathHints {
title: hints.title.clone(),
artist: hints.artist.clone(),
album: hints.album.clone(),
year: hints.year,
track_number: hints.track_number,
},
raw_meta.duration_secs,
)
.await?;
db::update_pending_status(&state.pool, pending_id, "processing", None).await?;
// RAG: find similar entries in DB
let artist_query = raw_meta.artist.as_deref()
.or(hints.artist.as_deref())
.unwrap_or("");
let album_query = raw_meta.album.as_deref()
.or(hints.album.as_deref())
.unwrap_or("");
tracing::info!(file = filename, "Searching database for similar artists/albums...");
let similar_artists = if !artist_query.is_empty() {
db::find_similar_artists(&state.pool, artist_query, 5).await.unwrap_or_default()
} else {
Vec::new()
};
let similar_albums = if !album_query.is_empty() {
db::find_similar_albums(&state.pool, album_query, 5).await.unwrap_or_default()
} else {
Vec::new()
};
if !similar_artists.is_empty() {
let names: Vec<&str> = similar_artists.iter().map(|a| a.name.as_str()).collect();
tracing::info!(file = filename, matches = ?names, "Found similar artists in DB");
}
if !similar_albums.is_empty() {
let names: Vec<&str> = similar_albums.iter().map(|a| a.name.as_str()).collect();
tracing::info!(file = filename, matches = ?names, "Found similar albums in DB");
}
// Call LLM for normalization
tracing::info!(file = filename, model = %state.config.ollama_model, "Sending to LLM for normalization...");
match normalize::normalize(state, &raw_meta, &hints, &similar_artists, &similar_albums).await {
Ok(normalized) => {
let confidence = normalized.confidence.unwrap_or(0.0);
let status = if confidence >= state.config.confidence_threshold {
"approved"
} else {
"review"
};
tracing::info!(
file = filename,
norm_artist = normalized.artist.as_deref().unwrap_or("-"),
norm_title = normalized.title.as_deref().unwrap_or("-"),
norm_album = normalized.album.as_deref().unwrap_or("-"),
confidence,
status,
notes = normalized.notes.as_deref().unwrap_or("-"),
"LLM normalization complete"
);
if !normalized.featured_artists.is_empty() {
tracing::info!(
file = filename,
featured = ?normalized.featured_artists,
"Featured artists detected"
);
}
db::update_pending_normalized(&state.pool, pending_id, status, &normalized, None).await?;
// Auto-approve: move file to storage
if status == "approved" {
let artist = normalized.artist.as_deref().unwrap_or("Unknown Artist");
let album = normalized.album.as_deref().unwrap_or("Unknown Album");
let title = normalized.title.as_deref().unwrap_or("Unknown Title");
let ext = file_path.extension().and_then(|e| e.to_str()).unwrap_or("flac");
let track_num = normalized.track_number.unwrap_or(0);
let dest_filename = if track_num > 0 {
format!("{:02} - {}.{}", track_num, sanitize_filename(title), ext)
} else {
format!("{}.{}", sanitize_filename(title), ext)
};
tracing::info!(
file = filename,
dest_artist = artist,
dest_album = album,
dest_filename = %dest_filename,
"Auto-approved, moving to storage..."
);
match mover::move_to_storage(
&state.config.storage_dir,
artist,
album,
&dest_filename,
file_path,
)
.await
{
Ok(storage_path) => {
let rel_path = storage_path.to_string_lossy().to_string();
match db::approve_and_finalize(&state.pool, pending_id, &rel_path).await {
Ok(track_id) => {
tracing::info!(file = filename, track_id, storage = %rel_path, "Track finalized in database");
}
Err(e) => {
tracing::error!(file = filename, ?e, "Failed to finalize track in DB after move");
}
}
}
Err(e) => {
tracing::error!(file = filename, ?e, "Failed to move file to storage");
db::update_pending_status(&state.pool, pending_id, "error", Some(&e.to_string())).await?;
}
}
} else {
tracing::info!(file = filename, confidence, "Sent to review queue (below threshold {})", state.config.confidence_threshold);
}
}
Err(e) => {
tracing::error!(file = filename, ?e, "LLM normalization failed");
db::update_pending_status(&state.pool, pending_id, "error", Some(&e.to_string())).await?;
}
}
Ok(true)
}
/// Process a cover image found in the inbox.
/// Uses path hints (Artist/Album/) to find the matching album in the DB,
/// then copies the image to the album's storage folder.
async fn process_cover_image(state: &Arc<AppState>, image_path: &std::path::Path) -> anyhow::Result<bool> {
let filename = image_path.file_name().and_then(|n| n.to_str()).unwrap_or("?");
// Hash for dedup
let path_clone = image_path.to_path_buf();
let (hash, file_size) = tokio::task::spawn_blocking(move || -> anyhow::Result<(String, i64)> {
let data = std::fs::read(&path_clone)?;
let hash = blake3::hash(&data).to_hex().to_string();
let size = data.len() as i64;
Ok((hash, size))
})
.await??;
if db::image_hash_exists(&state.pool, &hash).await? {
return Ok(false);
}
// Derive artist/album from path hints
let relative = image_path.strip_prefix(&state.config.inbox_dir).unwrap_or(image_path);
let components: Vec<&str> = relative
.components()
.filter_map(|c| c.as_os_str().to_str())
.collect();
tracing::info!(file = filename, path = ?relative, components = components.len(), "Processing cover image");
// Supported structures:
// Artist/Album/image.jpg (3+ components)
// Album/image.jpg (2 components — album dir + image)
if components.len() < 2 {
tracing::info!(file = filename, "Cover image not inside an album folder, skipping");
return Ok(false);
}
// The directory directly containing the image is always the album hint
let album_raw = components[components.len() - 2];
let path_artist = if components.len() >= 3 {
Some(components[components.len() - 3])
} else {
None
};
let (album_name, _) = path_hints::parse_album_year_public(album_raw);
tracing::info!(
file = filename,
path_artist = path_artist.unwrap_or("-"),
album_hint = %album_name,
"Looking up album in database..."
);
// Try to find album in DB — try with artist if available, then without
let album_id = if let Some(artist) = path_artist {
find_album_for_cover(&state.pool, artist, &album_name).await?
} else {
None
};
// If not found with artist, try fuzzy album name match across all artists
let album_id = match album_id {
Some(id) => Some(id),
None => {
let similar_albums = db::find_similar_albums(&state.pool, &album_name, 3).await.unwrap_or_default();
if let Some(best) = similar_albums.first() {
if best.similarity > 0.5 {
tracing::info!(file = filename, album = %best.name, similarity = best.similarity, "Matched album by fuzzy search");
Some(best.id)
} else {
None
}
} else {
None
}
}
};
let album_id = match album_id {
Some(id) => id,
None => {
tracing::info!(
file = filename,
artist = path_artist.unwrap_or("-"),
album = %album_name,
"No matching album found in DB, skipping cover"
);
return Ok(false);
}
};
// Determine image type and move to storage
let image_type = classify_image(filename);
let mime = mime_for_image(filename);
// Get album's storage path from any track in that album
let storage_dir_opt: Option<(String,)> = sqlx::query_as(
"SELECT storage_path FROM tracks WHERE album_id = $1 LIMIT 1"
)
.bind(album_id)
.fetch_optional(&state.pool)
.await?;
let album_storage_dir = match storage_dir_opt {
Some((track_path,)) => {
let p = std::path::Path::new(&track_path);
match p.parent() {
Some(dir) if dir.is_dir() => dir.to_path_buf(),
_ => {
tracing::warn!(file = filename, track_path = %track_path, "Track storage path has no valid parent dir");
return Ok(false);
}
}
}
None => {
tracing::info!(file = filename, album_id, "Album has no tracks in storage yet, skipping cover");
return Ok(false);
}
};
tracing::info!(file = filename, dest_dir = ?album_storage_dir, "Will copy cover to album storage dir");
let dest = album_storage_dir.join(filename);
if !dest.exists() {
// Move or copy image
match tokio::fs::rename(image_path, &dest).await {
Ok(()) => {}
Err(_) => {
tokio::fs::copy(image_path, &dest).await?;
tokio::fs::remove_file(image_path).await?;
}
}
}
let dest_str = dest.to_string_lossy().to_string();
db::insert_album_image(&state.pool, album_id, image_type, &dest_str, &hash, mime, file_size).await?;
tracing::info!(
file = filename,
album_id,
image_type,
dest = %dest_str,
"Album image saved"
);
Ok(true)
}
/// Find an album in DB matching the path-derived artist and album name.
/// Tries exact match, then fuzzy artist + exact album, then fuzzy artist + fuzzy album.
async fn find_album_for_cover(pool: &sqlx::PgPool, path_artist: &str, album_name: &str) -> anyhow::Result<Option<i64>> {
// Try exact match first
if let Some(id) = db::find_album_id(pool, path_artist, album_name).await? {
return Ok(Some(id));
}
// Try fuzzy artist, then exact or fuzzy album under that artist
let similar_artists = db::find_similar_artists(pool, path_artist, 5).await.unwrap_or_default();
for artist in &similar_artists {
if artist.similarity < 0.3 {
continue;
}
// Exact album under fuzzy artist
if let Some(id) = db::find_album_id(pool, &artist.name, album_name).await? {
return Ok(Some(id));
}
// Fuzzy album under this artist
let similar_albums = db::find_similar_albums(pool, album_name, 3).await.unwrap_or_default();
for album in &similar_albums {
if album.artist_id == artist.id && album.similarity > 0.4 {
return Ok(Some(album.id));
}
}
}
Ok(None)
}
/// Remove characters that are unsafe for filenames.
fn sanitize_filename(name: &str) -> String {
name.chars()
.map(|c| match c {
'/' | '\\' | ':' | '*' | '?' | '"' | '<' | '>' | '|' => '_',
_ => c,
})
.collect::<String>()
.trim()
.to_owned()
}

View File

@@ -0,0 +1,54 @@
use std::path::{Path, PathBuf};
/// Move a file from inbox to the permanent storage directory.
///
/// Creates the directory structure: `storage_dir/artist/album/filename`
/// Returns the full path of the moved file.
///
/// If `rename` fails (cross-device), falls back to copy + remove.
pub async fn move_to_storage(
storage_dir: &Path,
artist: &str,
album: &str,
filename: &str,
source: &Path,
) -> anyhow::Result<PathBuf> {
let artist_dir = sanitize_dir_name(artist);
let album_dir = sanitize_dir_name(album);
let dest_dir = storage_dir.join(&artist_dir).join(&album_dir);
tokio::fs::create_dir_all(&dest_dir).await?;
let dest = dest_dir.join(filename);
// Avoid overwriting existing files
if dest.exists() {
anyhow::bail!("Destination already exists: {:?}", dest);
}
// Try atomic rename first (same filesystem)
match tokio::fs::rename(source, &dest).await {
Ok(()) => {}
Err(_) => {
// Cross-device: copy then remove
tokio::fs::copy(source, &dest).await?;
tokio::fs::remove_file(source).await?;
}
}
tracing::info!(from = ?source, to = ?dest, "moved file to storage");
Ok(dest)
}
/// Remove characters that are unsafe for directory names.
fn sanitize_dir_name(name: &str) -> String {
name.chars()
.map(|c| match c {
'/' | '\\' | ':' | '*' | '?' | '"' | '<' | '>' | '|' | '\0' => '_',
_ => c,
})
.collect::<String>()
.trim()
.trim_matches('.')
.to_owned()
}

View File

@@ -0,0 +1,216 @@
use std::sync::Arc;
use serde::{Deserialize, Serialize};
use crate::db::{NormalizedFields, SimilarAlbum, SimilarArtist};
use crate::web::AppState;
use super::metadata::RawMetadata;
/// Build the user message with all context and call Ollama for normalization.
pub async fn normalize(
state: &Arc<AppState>,
raw: &RawMetadata,
hints: &crate::db::PathHints,
similar_artists: &[SimilarArtist],
similar_albums: &[SimilarAlbum],
) -> anyhow::Result<NormalizedFields> {
let user_message = build_user_message(raw, hints, similar_artists, similar_albums);
let response = call_ollama(
&state.config.ollama_url,
&state.config.ollama_model,
&state.system_prompt,
&user_message,
)
.await?;
parse_response(&response)
}
fn build_user_message(
raw: &RawMetadata,
hints: &crate::db::PathHints,
similar_artists: &[SimilarArtist],
similar_albums: &[SimilarAlbum],
) -> String {
let mut msg = String::from("## Raw metadata from file tags\n");
if let Some(v) = &raw.title {
msg.push_str(&format!("Title: \"{}\"\n", v));
}
if let Some(v) = &raw.artist {
msg.push_str(&format!("Artist: \"{}\"\n", v));
}
if let Some(v) = &raw.album {
msg.push_str(&format!("Album: \"{}\"\n", v));
}
if let Some(v) = raw.year {
msg.push_str(&format!("Year: {}\n", v));
}
if let Some(v) = raw.track_number {
msg.push_str(&format!("Track number: {}\n", v));
}
if let Some(v) = &raw.genre {
msg.push_str(&format!("Genre: \"{}\"\n", v));
}
msg.push_str("\n## Hints from file path\n");
if let Some(v) = &hints.artist {
msg.push_str(&format!("Path artist: \"{}\"\n", v));
}
if let Some(v) = &hints.album {
msg.push_str(&format!("Path album: \"{}\"\n", v));
}
if let Some(v) = hints.year {
msg.push_str(&format!("Path year: {}\n", v));
}
if let Some(v) = hints.track_number {
msg.push_str(&format!("Path track number: {}\n", v));
}
if let Some(v) = &hints.title {
msg.push_str(&format!("Path title: \"{}\"\n", v));
}
if !similar_artists.is_empty() {
msg.push_str("\n## Existing artists in database (similar matches)\n");
for a in similar_artists {
msg.push_str(&format!("- \"{}\" (similarity: {:.2})\n", a.name, a.similarity));
}
}
if !similar_albums.is_empty() {
msg.push_str("\n## Existing albums in database (similar matches)\n");
for a in similar_albums {
let year_str = a.year.map(|y| format!(", year: {}", y)).unwrap_or_default();
msg.push_str(&format!("- \"{}\" (similarity: {:.2}{})\n", a.name, a.similarity, year_str));
}
}
msg
}
#[derive(Serialize)]
struct OllamaRequest {
model: String,
messages: Vec<OllamaMessage>,
format: String,
stream: bool,
options: OllamaOptions,
}
#[derive(Serialize)]
struct OllamaMessage {
role: String,
content: String,
}
#[derive(Serialize)]
struct OllamaOptions {
temperature: f64,
}
#[derive(Deserialize)]
struct OllamaResponse {
message: OllamaResponseMessage,
}
#[derive(Deserialize)]
struct OllamaResponseMessage {
content: String,
}
async fn call_ollama(
base_url: &str,
model: &str,
system_prompt: &str,
user_message: &str,
) -> anyhow::Result<String> {
let client = reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(120))
.build()?;
let request = OllamaRequest {
model: model.to_owned(),
messages: vec![
OllamaMessage {
role: "system".to_owned(),
content: system_prompt.to_owned(),
},
OllamaMessage {
role: "user".to_owned(),
content: user_message.to_owned(),
},
],
format: "json".to_owned(),
stream: false,
options: OllamaOptions { temperature: 0.1 },
};
let url = format!("{}/api/chat", base_url.trim_end_matches('/'));
tracing::info!(%url, model, prompt_len = user_message.len(), "Calling Ollama API...");
let start = std::time::Instant::now();
let resp = client.post(&url).json(&request).send().await?;
let elapsed = start.elapsed();
if !resp.status().is_success() {
let status = resp.status();
let body = resp.text().await.unwrap_or_default();
tracing::error!(%status, body = &body[..body.len().min(500)], "Ollama API error");
anyhow::bail!("Ollama returned {}: {}", status, body);
}
let ollama_resp: OllamaResponse = resp.json().await?;
tracing::info!(
elapsed_ms = elapsed.as_millis() as u64,
response_len = ollama_resp.message.content.len(),
"Ollama response received"
);
tracing::debug!(raw_response = %ollama_resp.message.content, "LLM raw output");
Ok(ollama_resp.message.content)
}
/// Parse the LLM JSON response into NormalizedFields.
/// Handles both clean JSON and JSON wrapped in markdown code fences.
fn parse_response(response: &str) -> anyhow::Result<NormalizedFields> {
let cleaned = response.trim();
// Strip markdown code fences if present
let json_str = if cleaned.starts_with("```") {
let start = cleaned.find('{').unwrap_or(0);
let end = cleaned.rfind('}').map(|i| i + 1).unwrap_or(cleaned.len());
&cleaned[start..end]
} else {
cleaned
};
#[derive(Deserialize)]
struct LlmOutput {
artist: Option<String>,
album: Option<String>,
title: Option<String>,
year: Option<i32>,
track_number: Option<i32>,
genre: Option<String>,
#[serde(default)]
featured_artists: Vec<String>,
confidence: Option<f64>,
notes: Option<String>,
}
let parsed: LlmOutput = serde_json::from_str(json_str)
.map_err(|e| anyhow::anyhow!("Failed to parse LLM response as JSON: {} — raw: {}", e, response))?;
Ok(NormalizedFields {
title: parsed.title,
artist: parsed.artist,
album: parsed.album,
year: parsed.year,
track_number: parsed.track_number,
genre: parsed.genre,
featured_artists: parsed.featured_artists,
confidence: parsed.confidence,
notes: parsed.notes,
})
}

View File

@@ -0,0 +1,203 @@
use std::path::Path;
use crate::db::PathHints;
/// Parse metadata hints from the file path relative to the inbox directory.
///
/// Recognized patterns:
/// Artist/Album/01 - Title.ext
/// Artist/Album (Year)/01 - Title.ext
/// Artist/(Year) Album/01 - Title.ext
/// Artist/Album [Year]/01 - Title.ext
/// 01 - Title.ext (flat, no artist/album)
pub fn parse(relative_path: &Path) -> PathHints {
let components: Vec<&str> = relative_path
.components()
.filter_map(|c| c.as_os_str().to_str())
.collect();
let mut hints = PathHints::default();
let filename = components.last().copied().unwrap_or("");
let stem = Path::new(filename)
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("");
// Parse track number and title from filename
parse_filename(stem, &mut hints);
match components.len() {
// Artist/Album/file.ext
3.. => {
hints.artist = Some(components[0].to_owned());
let album_raw = components[1];
let (album, year) = parse_album_with_year(album_raw);
hints.album = Some(album);
if year.is_some() {
hints.year = year;
}
}
// Album/file.ext (or Artist/file.ext — ambiguous, treat as album)
2 => {
let dir = components[0];
let (name, year) = parse_album_with_year(dir);
hints.album = Some(name);
if year.is_some() {
hints.year = year;
}
}
// Just file.ext
_ => {}
}
hints
}
/// Try to extract track number and title from a filename stem.
///
/// Patterns: "01 - Title", "01. Title", "1 Title", "Title"
fn parse_filename(stem: &str, hints: &mut PathHints) {
let trimmed = stem.trim();
// Try "NN - Title" or "NN. Title"
if let Some(rest) = try_strip_track_prefix(trimmed) {
let (num_str, title) = rest;
if let Ok(num) = num_str.parse::<i32>() {
hints.track_number = Some(num);
if !title.is_empty() {
hints.title = Some(title.to_owned());
}
return;
}
}
// No track number found, use full stem as title
if !trimmed.is_empty() {
hints.title = Some(trimmed.to_owned());
}
}
/// Try to parse "NN - Rest" or "NN. Rest" from a string.
/// Returns (number_str, rest) if successful.
fn try_strip_track_prefix(s: &str) -> Option<(&str, &str)> {
// Find leading digits
let digit_end = s.find(|c: char| !c.is_ascii_digit())?;
if digit_end == 0 {
return None;
}
let num_str = &s[..digit_end];
let rest = s[digit_end..].trim_start();
// Expect separator: " - ", ". ", "- ", or just space if followed by letter
let title = if let Some(stripped) = rest.strip_prefix("- ") {
stripped.trim()
} else if let Some(stripped) = rest.strip_prefix(". ") {
stripped.trim()
} else if let Some(stripped) = rest.strip_prefix('.') {
stripped.trim()
} else if let Some(stripped) = rest.strip_prefix("- ") {
stripped.trim()
} else {
// Just "01 Title" — digits followed by space then text
rest
};
Some((num_str, title))
}
/// Public wrapper for cover image processing.
pub fn parse_album_year_public(dir: &str) -> (String, Option<i32>) {
parse_album_with_year(dir)
}
/// Extract album name and optional year from directory name.
///
/// Patterns: "Album (2001)", "(2001) Album", "Album [2001]", "Album"
fn parse_album_with_year(dir: &str) -> (String, Option<i32>) {
// Try "Album (YYYY)" or "Album [YYYY]"
for (open, close) in [('(', ')'), ('[', ']')] {
if let Some(start) = dir.rfind(open) {
if let Some(end) = dir[start..].find(close) {
let inside = &dir[start + 1..start + end];
if let Ok(year) = inside.trim().parse::<i32>() {
if (1900..=2100).contains(&year) {
let album = format!("{}{}", &dir[..start].trim(), &dir[start + end + 1..].trim());
let album = album.trim().to_owned();
return (album, Some(year));
}
}
}
}
}
// Try "(YYYY) Album"
if dir.starts_with('(') {
if let Some(end) = dir.find(')') {
let inside = &dir[1..end];
if let Ok(year) = inside.trim().parse::<i32>() {
if (1900..=2100).contains(&year) {
let album = dir[end + 1..].trim().to_owned();
return (album, Some(year));
}
}
}
}
(dir.to_owned(), None)
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
#[test]
fn test_artist_album_track() {
let p = PathBuf::from("Pink Floyd/Wish You Were Here (1975)/03 - Have a Cigar.flac");
let h = parse(&p);
assert_eq!(h.artist.as_deref(), Some("Pink Floyd"));
assert_eq!(h.album.as_deref(), Some("Wish You Were Here"));
assert_eq!(h.year, Some(1975));
assert_eq!(h.track_number, Some(3));
assert_eq!(h.title.as_deref(), Some("Have a Cigar"));
}
#[test]
fn test_year_prefix() {
let p = PathBuf::from("Artist/(2020) Album Name/01. Song.flac");
let h = parse(&p);
assert_eq!(h.artist.as_deref(), Some("Artist"));
assert_eq!(h.album.as_deref(), Some("Album Name"));
assert_eq!(h.year, Some(2020));
assert_eq!(h.track_number, Some(1));
assert_eq!(h.title.as_deref(), Some("Song"));
}
#[test]
fn test_flat_file() {
let p = PathBuf::from("05 - Something.mp3");
let h = parse(&p);
assert_eq!(h.artist, None);
assert_eq!(h.album, None);
assert_eq!(h.track_number, Some(5));
assert_eq!(h.title.as_deref(), Some("Something"));
}
#[test]
fn test_no_track_number() {
let p = PathBuf::from("Artist/Album/Song Name.flac");
let h = parse(&p);
assert_eq!(h.track_number, None);
assert_eq!(h.title.as_deref(), Some("Song Name"));
}
#[test]
fn test_square_bracket_year() {
let p = PathBuf::from("Band/Album [1999]/track.flac");
let h = parse(&p);
assert_eq!(h.album.as_deref(), Some("Album"));
assert_eq!(h.year, Some(1999));
}
}

58
furumi-agent/src/main.rs Normal file
View File

@@ -0,0 +1,58 @@
mod config;
mod db;
mod ingest;
mod web;
use std::sync::Arc;
use clap::Parser;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
tracing_subscriber::fmt::init();
let args = config::Args::parse();
args.validate()?;
let version = option_env!("FURUMI_VERSION").unwrap_or(env!("CARGO_PKG_VERSION"));
tracing::info!("Furumi Agent v{} starting", version);
tracing::info!("Inbox directory: {:?}", args.inbox_dir);
tracing::info!("Storage directory: {:?}", args.storage_dir);
tracing::info!("Ollama: {} (model: {})", args.ollama_url, args.ollama_model);
tracing::info!("Confidence threshold: {}", args.confidence_threshold);
let system_prompt = args.load_system_prompt()?;
tracing::info!("System prompt loaded: {} chars", system_prompt.len());
tracing::info!("Connecting to database...");
let pool = db::connect(&args.database_url).await?;
tracing::info!("Running database migrations...");
db::migrate(&pool).await?;
tracing::info!("Database ready");
let state = Arc::new(web::AppState {
pool: pool.clone(),
config: Arc::new(args),
system_prompt: Arc::new(system_prompt),
});
// Spawn the ingest pipeline as a background task
let ingest_state = state.clone();
tokio::spawn(async move {
ingest::run(ingest_state).await;
});
// Start the admin web UI
let bind_addr: std::net::SocketAddr = state.config.bind.parse().unwrap_or_else(|e| {
eprintln!("Error: Invalid bind address '{}': {}", state.config.bind, e);
std::process::exit(1);
});
tracing::info!("Admin UI: http://{}", bind_addr);
let app = web::build_router(state);
let listener = tokio::net::TcpListener::bind(bind_addr).await?;
axum::serve(listener, app).await?;
Ok(())
}

View File

@@ -0,0 +1,621 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Furumi Agent — Admin</title>
<style>
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
:root {
--bg-base: #0a0c12;
--bg-panel: #111520;
--bg-card: #161d2e;
--bg-hover: #1e2740;
--bg-active: #252f4a;
--border: #1f2c45;
--accent: #7c6af7;
--accent-dim: #5a4fcf;
--text: #e2e8f0;
--text-muted: #64748b;
--text-dim: #94a3b8;
--success: #34d399;
--danger: #f87171;
--warning: #fbbf24;
}
html, body { height: 100%; overflow: hidden; }
body {
font-family: 'Inter', sans-serif;
background: var(--bg-base);
color: var(--text);
display: flex;
flex-direction: column;
}
header {
background: var(--bg-panel);
border-bottom: 1px solid var(--border);
padding: 12px 24px;
display: flex;
align-items: center;
gap: 24px;
}
header h1 {
font-size: 16px;
font-weight: 600;
}
.stats {
display: flex;
gap: 16px;
margin-left: auto;
font-size: 13px;
color: var(--text-dim);
}
.stats .stat { display: flex; gap: 4px; align-items: center; }
.stats .stat-value { color: var(--text); font-weight: 600; }
nav {
display: flex;
gap: 4px;
}
nav button {
background: none;
border: none;
color: var(--text-muted);
padding: 6px 12px;
border-radius: 6px;
cursor: pointer;
font-size: 13px;
font-family: inherit;
}
nav button:hover { background: var(--bg-hover); color: var(--text); }
nav button.active { background: var(--bg-active); color: var(--accent); }
main {
flex: 1;
overflow-y: auto;
padding: 16px 24px;
}
table {
width: 100%;
border-collapse: collapse;
font-size: 13px;
}
th {
text-align: left;
padding: 8px 12px;
color: var(--text-muted);
font-weight: 500;
border-bottom: 1px solid var(--border);
position: sticky;
top: 0;
background: var(--bg-base);
}
td {
padding: 8px 12px;
border-bottom: 1px solid var(--border);
max-width: 200px;
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
}
tr:hover td { background: var(--bg-hover); }
.status {
padding: 2px 8px;
border-radius: 4px;
font-size: 11px;
font-weight: 600;
text-transform: uppercase;
}
.status-pending { background: #1e293b; color: var(--text-muted); }
.status-processing { background: #1e1b4b; color: var(--accent); }
.status-review { background: #422006; color: var(--warning); }
.status-approved { background: #052e16; color: var(--success); }
.status-rejected { background: #450a0a; color: var(--danger); }
.status-error { background: #450a0a; color: var(--danger); }
.actions {
display: flex;
gap: 4px;
}
.btn {
border: none;
padding: 4px 10px;
border-radius: 4px;
cursor: pointer;
font-size: 12px;
font-family: inherit;
font-weight: 500;
}
.btn-approve { background: #052e16; color: var(--success); }
.btn-approve:hover { background: #065f46; }
.btn-reject { background: #450a0a; color: var(--danger); }
.btn-reject:hover { background: #7f1d1d; }
.btn-edit { background: var(--bg-active); color: var(--text-dim); }
.btn-edit:hover { background: var(--bg-hover); color: var(--text); }
.empty {
text-align: center;
padding: 48px;
color: var(--text-muted);
font-size: 14px;
}
/* Modal */
.modal-overlay {
display: none;
position: fixed;
inset: 0;
background: rgba(0,0,0,0.7);
z-index: 100;
align-items: center;
justify-content: center;
}
.modal-overlay.visible { display: flex; }
.modal {
background: var(--bg-panel);
border: 1px solid var(--border);
border-radius: 12px;
padding: 24px;
min-width: 400px;
max-width: 600px;
}
.modal h2 { font-size: 16px; margin-bottom: 16px; }
.modal label {
display: block;
font-size: 12px;
color: var(--text-muted);
margin-bottom: 4px;
margin-top: 12px;
}
.modal input, .modal textarea {
width: 100%;
background: var(--bg-card);
border: 1px solid var(--border);
border-radius: 6px;
padding: 8px 10px;
color: var(--text);
font-family: inherit;
font-size: 13px;
}
.modal textarea { resize: vertical; min-height: 60px; }
.modal-actions {
margin-top: 20px;
display: flex;
gap: 8px;
justify-content: flex-end;
}
.modal-actions .btn {
padding: 8px 16px;
}
.btn-primary { background: var(--accent); color: white; }
.btn-primary:hover { background: var(--accent-dim); }
.btn-cancel { background: var(--bg-card); color: var(--text-dim); }
.btn-cancel:hover { background: var(--bg-hover); }
/* Detail fields in modal */
.detail-row {
display: flex;
gap: 12px;
margin-top: 8px;
}
.detail-row .field { flex: 1; }
.raw-value {
font-size: 11px;
color: var(--text-muted);
margin-top: 2px;
}
/* Featured artists tags */
.feat-tags {
display: flex;
flex-wrap: wrap;
gap: 6px;
margin-top: 6px;
min-height: 28px;
}
.feat-tag {
display: flex;
align-items: center;
gap: 4px;
background: var(--bg-active);
border: 1px solid var(--border);
border-radius: 4px;
padding: 2px 8px;
font-size: 12px;
}
.feat-tag .remove {
cursor: pointer;
color: var(--text-muted);
font-size: 14px;
line-height: 1;
}
.feat-tag .remove:hover { color: var(--danger); }
/* Artist search dropdown */
.artist-search-wrap {
position: relative;
margin-top: 6px;
}
.artist-search-wrap input {
width: 100%;
}
.artist-dropdown {
position: absolute;
top: 100%;
left: 0;
right: 0;
background: var(--bg-card);
border: 1px solid var(--border);
border-radius: 0 0 6px 6px;
max-height: 160px;
overflow-y: auto;
z-index: 10;
display: none;
}
.artist-dropdown.open { display: block; }
.artist-option {
padding: 6px 10px;
cursor: pointer;
font-size: 13px;
display: flex;
justify-content: space-between;
}
.artist-option:hover { background: var(--bg-hover); }
.artist-option .sim {
color: var(--text-muted);
font-size: 11px;
}
</style>
</head>
<body>
<header>
<h1>Furumi Agent</h1>
<nav>
<button class="active" onclick="showTab('queue')">Queue</button>
<button onclick="showTab('artists')">Artists</button>
</nav>
<div class="stats" id="statsBar"></div>
</header>
<main id="content"></main>
<div class="modal-overlay" id="modalOverlay" onclick="if(event.target===this)closeModal()">
<div class="modal" id="modal"></div>
</div>
<script>
const API = '/api';
let currentTab = 'queue';
let currentFilter = null;
async function api(path, opts) {
const r = await fetch(API + path, opts);
if (r.status === 204) return null;
const text = await r.text();
if (!text) return null;
try { return JSON.parse(text); }
catch(e) { console.error('API parse error:', r.status, text); return null; }
}
async function loadStats() {
const s = await api('/stats');
document.getElementById('statsBar').innerHTML = `
<div class="stat">Tracks: <span class="stat-value">${s.total_tracks}</span></div>
<div class="stat">Artists: <span class="stat-value">${s.total_artists}</span></div>
<div class="stat">Albums: <span class="stat-value">${s.total_albums}</span></div>
<div class="stat">Pending: <span class="stat-value">${s.pending_count}</span></div>
<div class="stat">Review: <span class="stat-value">${s.review_count}</span></div>
<div class="stat">Errors: <span class="stat-value">${s.error_count}</span></div>
`;
}
function showTab(tab) {
currentTab = tab;
document.querySelectorAll('nav button').forEach(b => b.classList.remove('active'));
event.target.classList.add('active');
if (tab === 'queue') loadQueue();
else if (tab === 'artists') loadArtists();
}
async function loadQueue(status) {
currentFilter = status;
const qs = status ? `?status=${status}` : '';
const items = await api(`/queue${qs}`);
const el = document.getElementById('content');
if (!items.length) {
el.innerHTML = '<div class="empty">No items in queue</div>';
return;
}
let html = `
<div style="margin-bottom:12px;display:flex;gap:4px">
<button class="btn ${!status?'btn-primary':'btn-edit'}" onclick="loadQueue()">All</button>
<button class="btn ${status==='review'?'btn-primary':'btn-edit'}" onclick="loadQueue('review')">Review</button>
<button class="btn ${status==='pending'?'btn-primary':'btn-edit'}" onclick="loadQueue('pending')">Pending</button>
<button class="btn ${status==='approved'?'btn-primary':'btn-edit'}" onclick="loadQueue('approved')">Approved</button>
<button class="btn ${status==='error'?'btn-primary':'btn-edit'}" onclick="loadQueue('error')">Errors</button>
</div>
<table>
<tr><th>Status</th><th>Raw Artist</th><th>Raw Title</th><th>Norm Artist</th><th>Norm Title</th><th>Norm Album</th><th>Conf</th><th>Actions</th></tr>
`;
for (const it of items) {
const conf = it.confidence != null ? it.confidence.toFixed(2) : '-';
html += `<tr>
<td><span class="status status-${it.status}">${it.status}</span></td>
<td title="${esc(it.raw_artist)}">${esc(it.raw_artist || '-')}</td>
<td title="${esc(it.raw_title)}">${esc(it.raw_title || '-')}</td>
<td title="${esc(it.norm_artist)}">${esc(it.norm_artist || '-')}</td>
<td title="${esc(it.norm_title)}">${esc(it.norm_title || '-')}</td>
<td title="${esc(it.norm_album)}">${esc(it.norm_album || '-')}</td>
<td>${conf}</td>
<td class="actions">
${it.status === 'review' ? `<button class="btn btn-approve" onclick="approveItem('${it.id}')">Approve</button>` : ''}
${it.status === 'review' ? `<button class="btn btn-reject" onclick="rejectItem('${it.id}')">Reject</button>` : ''}
<button class="btn btn-edit" onclick="editItem('${it.id}')">Edit</button>
</td>
</tr>`;
}
html += '</table>';
el.innerHTML = html;
}
async function loadArtists() {
const artists = await api('/artists');
const el = document.getElementById('content');
if (!artists.length) {
el.innerHTML = '<div class="empty">No artists yet</div>';
return;
}
let html = '<table><tr><th>ID</th><th>Name</th><th>Actions</th></tr>';
for (const a of artists) {
html += `<tr>
<td>${a.id}</td>
<td>${esc(a.name)}</td>
<td class="actions">
<button class="btn btn-edit" onclick="editArtist(${a.id}, '${esc(a.name)}')">Rename</button>
</td>
</tr>`;
}
html += '</table>';
el.innerHTML = html;
}
async function approveItem(id) {
await api(`/queue/${id}/approve`, { method: 'POST' });
loadStats();
loadQueue(currentFilter);
}
async function rejectItem(id) {
await api(`/queue/${id}/reject`, { method: 'POST' });
loadStats();
loadQueue(currentFilter);
}
let editFeatured = [];
let searchTimer = null;
async function editItem(id) {
const item = await api(`/queue/${id}`);
if (!item) return;
// Parse featured artists from JSON string
editFeatured = [];
if (item.norm_featured_artists) {
try { editFeatured = JSON.parse(item.norm_featured_artists); } catch(e) {}
}
document.getElementById('modal').innerHTML = `
<h2>Edit Metadata</h2>
<div class="detail-row">
<div class="field">
<label>Artist</label>
<input id="ed-artist" value="${esc(item.norm_artist || item.raw_artist || '')}">
<div class="raw-value">Raw: ${esc(item.raw_artist || '-')} | Path: ${esc(item.path_artist || '-')}</div>
</div>
</div>
<div class="detail-row">
<div class="field">
<label>Title</label>
<input id="ed-title" value="${esc(item.norm_title || item.raw_title || '')}">
<div class="raw-value">Raw: ${esc(item.raw_title || '-')} | Path: ${esc(item.path_title || '-')}</div>
</div>
</div>
<div class="detail-row">
<div class="field">
<label>Album</label>
<input id="ed-album" value="${esc(item.norm_album || item.raw_album || '')}">
<div class="raw-value">Raw: ${esc(item.raw_album || '-')} | Path: ${esc(item.path_album || '-')}</div>
</div>
<div class="field">
<label>Year</label>
<input id="ed-year" type="number" value="${item.norm_year || item.raw_year || ''}">
</div>
</div>
<div class="detail-row">
<div class="field">
<label>Track #</label>
<input id="ed-track" type="number" value="${item.norm_track_number || item.raw_track_number || ''}">
</div>
<div class="field">
<label>Genre</label>
<input id="ed-genre" value="${esc(item.norm_genre || item.raw_genre || '')}">
</div>
</div>
<label>Featured Artists</label>
<div class="feat-tags" id="feat-tags"></div>
<div class="artist-search-wrap">
<input id="feat-search" placeholder="Search artist to add..." autocomplete="off"
oninput="onFeatSearch(this.value)" onkeydown="onFeatKey(event)">
<div class="artist-dropdown" id="feat-dropdown"></div>
</div>
${item.llm_notes ? `<label>Agent Notes</label><div class="raw-value" style="margin-bottom:8px">${esc(item.llm_notes)}</div>` : ''}
${item.error_message ? `<label>Error</label><div class="raw-value" style="color:var(--danger)">${esc(item.error_message)}</div>` : ''}
<div class="modal-actions">
<button class="btn btn-cancel" onclick="closeModal()">Cancel</button>
<button class="btn btn-primary" onclick="saveEdit('${item.id}')">Save</button>
</div>
`;
renderFeatTags();
openModal();
}
function renderFeatTags() {
const el = document.getElementById('feat-tags');
if (!el) return;
el.innerHTML = editFeatured.map((name, i) =>
`<span class="feat-tag">${esc(name)}<span class="remove" onclick="removeFeat(${i})">&times;</span></span>`
).join('');
}
function removeFeat(idx) {
editFeatured.splice(idx, 1);
renderFeatTags();
}
function addFeat(name) {
name = name.trim();
if (!name || editFeatured.includes(name)) return;
editFeatured.push(name);
renderFeatTags();
const input = document.getElementById('feat-search');
if (input) { input.value = ''; }
closeFeatDropdown();
}
function onFeatSearch(q) {
clearTimeout(searchTimer);
if (q.length < 2) { closeFeatDropdown(); return; }
searchTimer = setTimeout(async () => {
const results = await api(`/artists/search?q=${encodeURIComponent(q)}&limit=8`);
const dd = document.getElementById('feat-dropdown');
if (!results || !results.length) {
// Show option to add as new
dd.innerHTML = `<div class="artist-option" onclick="addFeat('${esc(q)}')">
Add "${esc(q)}" as new
</div>`;
dd.classList.add('open');
return;
}
let html = '';
for (const a of results) {
html += `<div class="artist-option" onclick="addFeat('${esc(a.name)}')">
${esc(a.name)}
</div>`;
}
// Always offer to add typed value as-is
const typed = document.getElementById('feat-search').value.trim();
if (typed && !results.find(a => a.name.toLowerCase() === typed.toLowerCase())) {
html += `<div class="artist-option" onclick="addFeat('${esc(typed)}')">
Add "${esc(typed)}" as new
</div>`;
}
dd.innerHTML = html;
dd.classList.add('open');
}, 250);
}
function onFeatKey(e) {
if (e.key === 'Enter') {
e.preventDefault();
const val = e.target.value.trim();
if (val) addFeat(val);
} else if (e.key === 'Escape') {
closeFeatDropdown();
}
}
function closeFeatDropdown() {
const dd = document.getElementById('feat-dropdown');
if (dd) dd.classList.remove('open');
}
async function saveEdit(id) {
const body = {
norm_artist: document.getElementById('ed-artist').value || null,
norm_title: document.getElementById('ed-title').value || null,
norm_album: document.getElementById('ed-album').value || null,
norm_year: parseInt(document.getElementById('ed-year').value) || null,
norm_track_number: parseInt(document.getElementById('ed-track').value) || null,
norm_genre: document.getElementById('ed-genre').value || null,
featured_artists: editFeatured,
};
await api(`/queue/${id}/update`, {
method: 'PUT',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(body),
});
closeModal();
loadQueue(currentFilter);
}
async function editArtist(id, currentName) {
const name = prompt('New artist name:', currentName);
if (!name || name === currentName) return;
await api(`/artists/${id}`, {
method: 'PUT',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ name }),
});
loadArtists();
}
function openModal() { document.getElementById('modalOverlay').classList.add('visible'); }
function closeModal() { document.getElementById('modalOverlay').classList.remove('visible'); }
function esc(s) {
if (s == null) return '';
return String(s).replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;').replace(/"/g,'&quot;').replace(/'/g,'&#39;');
}
// Init
loadStats();
loadQueue();
setInterval(loadStats, 10000);
</script>
</body>
</html>

236
furumi-agent/src/web/api.rs Normal file
View File

@@ -0,0 +1,236 @@
use std::sync::Arc;
use axum::{
extract::{Path, Query, State},
http::StatusCode,
response::{IntoResponse, Json},
};
use serde::Deserialize;
use uuid::Uuid;
use crate::db;
use super::AppState;
type S = Arc<AppState>;
// --- Stats ---
pub async fn stats(State(state): State<S>) -> impl IntoResponse {
match db::get_stats(&state.pool).await {
Ok(stats) => (StatusCode::OK, Json(serde_json::to_value(stats).unwrap())).into_response(),
Err(e) => error_response(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()),
}
}
// --- Queue ---
#[derive(Deserialize)]
pub struct QueueQuery {
#[serde(default)]
pub status: Option<String>,
#[serde(default = "default_limit")]
pub limit: i64,
#[serde(default)]
pub offset: i64,
}
fn default_limit() -> i64 {
50
}
pub async fn list_queue(State(state): State<S>, Query(q): Query<QueueQuery>) -> impl IntoResponse {
match db::list_pending(&state.pool, q.status.as_deref(), q.limit, q.offset).await {
Ok(items) => (StatusCode::OK, Json(serde_json::to_value(items).unwrap())).into_response(),
Err(e) => error_response(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()),
}
}
pub async fn get_queue_item(State(state): State<S>, Path(id): Path<Uuid>) -> impl IntoResponse {
match db::get_pending(&state.pool, id).await {
Ok(Some(item)) => (StatusCode::OK, Json(serde_json::to_value(item).unwrap())).into_response(),
Ok(None) => error_response(StatusCode::NOT_FOUND, "not found"),
Err(e) => error_response(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()),
}
}
pub async fn delete_queue_item(State(state): State<S>, Path(id): Path<Uuid>) -> impl IntoResponse {
match db::delete_pending(&state.pool, id).await {
Ok(true) => StatusCode::NO_CONTENT.into_response(),
Ok(false) => error_response(StatusCode::NOT_FOUND, "not found"),
Err(e) => error_response(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()),
}
}
pub async fn approve_queue_item(State(state): State<S>, Path(id): Path<Uuid>) -> impl IntoResponse {
// Get pending track, move file, finalize in DB
let pt = match db::get_pending(&state.pool, id).await {
Ok(Some(pt)) => pt,
Ok(None) => return error_response(StatusCode::NOT_FOUND, "not found"),
Err(e) => return error_response(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()),
};
let artist = pt.norm_artist.as_deref().unwrap_or("Unknown Artist");
let album = pt.norm_album.as_deref().unwrap_or("Unknown Album");
let title = pt.norm_title.as_deref().unwrap_or("Unknown Title");
let source = std::path::Path::new(&pt.inbox_path);
let ext = source.extension().and_then(|e| e.to_str()).unwrap_or("flac");
let track_num = pt.norm_track_number.unwrap_or(0);
let filename = if track_num > 0 {
format!("{:02} - {}.{}", track_num, sanitize_filename(title), ext)
} else {
format!("{}.{}", sanitize_filename(title), ext)
};
match crate::ingest::mover::move_to_storage(
&state.config.storage_dir,
artist,
album,
&filename,
source,
)
.await
{
Ok(storage_path) => {
let rel_path = storage_path.to_string_lossy().to_string();
match db::approve_and_finalize(&state.pool, id, &rel_path).await {
Ok(track_id) => (StatusCode::OK, Json(serde_json::json!({"track_id": track_id}))).into_response(),
Err(e) => error_response(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()),
}
}
Err(e) => error_response(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()),
}
}
pub async fn reject_queue_item(State(state): State<S>, Path(id): Path<Uuid>) -> impl IntoResponse {
match db::update_pending_status(&state.pool, id, "rejected", None).await {
Ok(()) => StatusCode::NO_CONTENT.into_response(),
Err(e) => error_response(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()),
}
}
#[derive(Deserialize)]
pub struct UpdateQueueItem {
pub norm_title: Option<String>,
pub norm_artist: Option<String>,
pub norm_album: Option<String>,
pub norm_year: Option<i32>,
pub norm_track_number: Option<i32>,
pub norm_genre: Option<String>,
#[serde(default)]
pub featured_artists: Vec<String>,
}
pub async fn update_queue_item(
State(state): State<S>,
Path(id): Path<Uuid>,
Json(body): Json<UpdateQueueItem>,
) -> impl IntoResponse {
let norm = db::NormalizedFields {
title: body.norm_title,
artist: body.norm_artist,
album: body.norm_album,
year: body.norm_year,
track_number: body.norm_track_number,
genre: body.norm_genre,
featured_artists: body.featured_artists,
confidence: Some(1.0), // manual edit = full confidence
notes: Some("Manually edited".to_owned()),
};
match db::update_pending_normalized(&state.pool, id, "review", &norm, None).await {
Ok(()) => StatusCode::NO_CONTENT.into_response(),
Err(e) => error_response(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()),
}
}
// --- Artists ---
#[derive(Deserialize)]
pub struct SearchArtistsQuery {
pub q: String,
#[serde(default = "default_search_limit")]
pub limit: i32,
}
fn default_search_limit() -> i32 {
10
}
pub async fn search_artists(State(state): State<S>, Query(q): Query<SearchArtistsQuery>) -> impl IntoResponse {
if q.q.is_empty() {
return (StatusCode::OK, Json(serde_json::json!([]))).into_response();
}
match db::find_similar_artists(&state.pool, &q.q, q.limit).await {
Ok(artists) => (StatusCode::OK, Json(serde_json::to_value(artists).unwrap())).into_response(),
Err(e) => error_response(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()),
}
}
pub async fn list_artists(State(state): State<S>) -> impl IntoResponse {
match db::list_artists_all(&state.pool).await {
Ok(artists) => (StatusCode::OK, Json(serde_json::to_value(artists).unwrap())).into_response(),
Err(e) => error_response(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()),
}
}
#[derive(Deserialize)]
pub struct UpdateArtistBody {
pub name: String,
}
pub async fn update_artist(
State(state): State<S>,
Path(id): Path<i64>,
Json(body): Json<UpdateArtistBody>,
) -> impl IntoResponse {
match db::update_artist_name(&state.pool, id, &body.name).await {
Ok(true) => StatusCode::NO_CONTENT.into_response(),
Ok(false) => error_response(StatusCode::NOT_FOUND, "not found"),
Err(e) => error_response(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()),
}
}
// --- Albums ---
pub async fn list_albums(State(state): State<S>, Path(artist_id): Path<i64>) -> impl IntoResponse {
match db::list_albums_by_artist(&state.pool, artist_id).await {
Ok(albums) => (StatusCode::OK, Json(serde_json::to_value(albums).unwrap())).into_response(),
Err(e) => error_response(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()),
}
}
#[derive(Deserialize)]
pub struct UpdateAlbumBody {
pub name: String,
pub year: Option<i32>,
}
pub async fn update_album(
State(state): State<S>,
Path(id): Path<i64>,
Json(body): Json<UpdateAlbumBody>,
) -> impl IntoResponse {
match db::update_album(&state.pool, id, &body.name, body.year).await {
Ok(true) => StatusCode::NO_CONTENT.into_response(),
Ok(false) => error_response(StatusCode::NOT_FOUND, "not found"),
Err(e) => error_response(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()),
}
}
// --- Helpers ---
fn error_response(status: StatusCode, message: &str) -> axum::response::Response {
(status, Json(serde_json::json!({"error": message}))).into_response()
}
fn sanitize_filename(name: &str) -> String {
name.chars()
.map(|c| match c {
'/' | '\\' | ':' | '*' | '?' | '"' | '<' | '>' | '|' => '_',
_ => c,
})
.collect::<String>()
.trim()
.to_owned()
}

View File

@@ -0,0 +1,39 @@
pub mod api;
use std::sync::Arc;
use axum::{Router, routing::{get, post, put}};
use sqlx::PgPool;
use crate::config::Args;
#[derive(Clone)]
pub struct AppState {
pub pool: PgPool,
pub config: Arc<Args>,
pub system_prompt: Arc<String>,
}
pub fn build_router(state: Arc<AppState>) -> Router {
let api = Router::new()
.route("/stats", get(api::stats))
.route("/queue", get(api::list_queue))
.route("/queue/:id", get(api::get_queue_item).delete(api::delete_queue_item))
.route("/queue/:id/approve", post(api::approve_queue_item))
.route("/queue/:id/reject", post(api::reject_queue_item))
.route("/queue/:id/update", put(api::update_queue_item))
.route("/artists/search", get(api::search_artists))
.route("/artists", get(api::list_artists))
.route("/artists/:id", put(api::update_artist))
.route("/artists/:id/albums", get(api::list_albums))
.route("/albums/:id", put(api::update_album));
Router::new()
.route("/", get(admin_html))
.nest("/api", api)
.with_state(state)
}
async fn admin_html() -> axum::response::Html<&'static str> {
axum::response::Html(include_str!("admin.html"))
}