pub mod cover_art; pub mod dto; pub mod metadata; pub mod mover; pub mod normalize; pub mod path_hints; pub mod rag; use serde::Deserialize; // --------------------------------------------------------------------------- // LLM health probe — called from the admin settings page // --------------------------------------------------------------------------- /// Result of probing the LLM API. #[derive(Debug, Default)] pub struct AgentProbeResult { pub ok: bool, pub model_intro: String, pub model_name: String, pub prompt_tokens: Option, pub completion_tokens: Option, pub tokens_per_sec: Option, pub latency_ms: u64, pub error: String, } /// Send a lightweight "introduce yourself" prompt to the LLM and return the /// response together with timing / usage statistics when available. pub async fn probe_llm( llm_url: &str, llm_model: &str, llm_auth: &str, ) -> AgentProbeResult { let start = std::time::Instant::now(); let client = match reqwest::Client::builder() .timeout(std::time::Duration::from_secs(30)) .build() { Ok(c) => c, Err(e) => { return AgentProbeResult { error: format!("failed to create HTTP client: {e}"), ..Default::default() }; } }; let body = serde_json::json!({ "model": llm_model, "messages": [ { "role": "user", "content": "Introduce yourself briefly: what model are you, who made you? Reply in 1–2 sentences." } ], "stream": false, "temperature": 0.3, "max_tokens": 256 }); let url = format!("{}/v1/chat/completions", llm_url.trim_end_matches('/')); let mut req = client.post(&url).json(&body); if !llm_auth.is_empty() { req = req.header("Authorization", llm_auth); } let resp = match req.send().await { Ok(r) => r, Err(e) => { return AgentProbeResult { latency_ms: start.elapsed().as_millis() as u64, error: format!("connection failed: {e}"), ..Default::default() }; } }; let elapsed = start.elapsed(); let latency_ms = elapsed.as_millis() as u64; if !resp.status().is_success() { let status = resp.status(); let body_text = resp.text().await.unwrap_or_default(); return AgentProbeResult { latency_ms, error: format!("HTTP {status}: {}", &body_text[..body_text.len().min(300)]), ..Default::default() }; } #[derive(Deserialize)] struct ProbeResponse { choices: Option>, model: Option, usage: Option, } #[derive(Deserialize)] struct ProbeChoice { message: Option, } #[derive(Deserialize)] struct ProbeMessage { content: Option, } #[derive(Deserialize)] struct ProbeUsage { prompt_tokens: Option, completion_tokens: Option, } let raw: ProbeResponse = match resp.json().await { Ok(r) => r, Err(e) => { return AgentProbeResult { latency_ms, error: format!("failed to parse response: {e}"), ..Default::default() }; } }; let model_intro = raw .choices .as_ref() .and_then(|c| c.first()) .and_then(|c| c.message.as_ref()) .and_then(|m| m.content.clone()) .unwrap_or_default(); let model_name = raw.model.unwrap_or_default(); let prompt_tokens = raw.usage.as_ref().and_then(|u| u.prompt_tokens); let completion_tokens = raw.usage.as_ref().and_then(|u| u.completion_tokens); // Compute tokens/sec from completion tokens and wall time let tokens_per_sec = completion_tokens.map(|ct| { if elapsed.as_secs_f64() > 0.0 { ct as f64 / elapsed.as_secs_f64() } else { 0.0 } }); AgentProbeResult { ok: true, model_intro, model_name, prompt_tokens, completion_tokens, tokens_per_sec, latency_ms, error: String::new(), } }