Fixed openai api endpoint

2026-04-07 19:32:17 +01:00
parent aea4aef4b2
commit 70a947a8c1
4 changed files with 128 additions and 25 deletions
@@ -113,32 +113,38 @@ fn build_user_message(
 }

 #[derive(Serialize)]
-struct OllamaRequest {
+struct ChatRequest {
    model: String,
-    messages: Vec<OllamaMessage>,
-    format: String,
+    messages: Vec<ChatMessage>,
+    response_format: ChatResponseFormat,
    stream: bool,
-    options: OllamaOptions,
+    temperature: f64,
 }

 #[derive(Serialize)]
-struct OllamaMessage {
+struct ChatMessage {
    role: String,
    content: String,
 }

 #[derive(Serialize)]
-struct OllamaOptions {
-    temperature: f64,
+struct ChatResponseFormat {
+    #[serde(rename = "type")]
+    kind: String,
 }

 #[derive(Deserialize)]
-struct OllamaResponse {
-    message: OllamaResponseMessage,
+struct ChatResponse {
+    choices: Vec<ChatChoice>,
 }

 #[derive(Deserialize)]
-struct OllamaResponseMessage {
+struct ChatChoice {
+    message: ChatResponseMessage,
+}
+
+#[derive(Deserialize)]
+struct ChatResponseMessage {
    content: String,
 }

@@ -153,25 +159,25 @@ pub async fn call_ollama(
        .timeout(std::time::Duration::from_secs(120))
        .build()?;

-    let request = OllamaRequest {
+    let request = ChatRequest {
        model: model.to_owned(),
        messages: vec![
-            OllamaMessage {
+            ChatMessage {
                role: "system".to_owned(),
                content: system_prompt.to_owned(),
            },
-            OllamaMessage {
+            ChatMessage {
                role: "user".to_owned(),
                content: user_message.to_owned(),
            },
        ],
-        format: "json".to_owned(),
+        response_format: ChatResponseFormat { kind: "json_object".to_owned() },
        stream: false,
-        options: OllamaOptions { temperature: 0.1 },
+        temperature: 0.1,
    };

-    let url = format!("{}/api/chat", base_url.trim_end_matches('/'));
-    tracing::info!(%url, model, prompt_len = user_message.len(), "Calling Ollama API...");
+    let url = format!("{}/v1/chat/completions", base_url.trim_end_matches('/'));
+    tracing::info!(%url, model, prompt_len = user_message.len(), "Calling LLM API...");

    let start = std::time::Instant::now();
    let mut req = client.post(&url).json(&request);
@@ -184,18 +190,25 @@ pub async fn call_ollama(
    if !resp.status().is_success() {
        let status = resp.status();
        let body = resp.text().await.unwrap_or_default();
-        tracing::error!(%status, body = &body[..body.len().min(500)], "Ollama API error");
-        anyhow::bail!("Ollama returned {}: {}", status, body);
+        tracing::error!(%status, body = &body[..body.len().min(500)], "LLM API error");
+        anyhow::bail!("LLM returned {}: {}", status, body);
    }

-    let ollama_resp: OllamaResponse = resp.json().await?;
+    let chat_resp: ChatResponse = resp.json().await?;
+    let content = chat_resp
+        .choices
+        .into_iter()
+        .next()
+        .ok_or_else(|| anyhow::anyhow!("LLM returned empty choices"))?
+        .message
+        .content;
    tracing::info!(
        elapsed_ms = elapsed.as_millis() as u64,
-        response_len = ollama_resp.message.content.len(),
-        "Ollama response received"
+        response_len = content.len(),
+        "LLM response received"
    );
-    tracing::debug!(raw_response = %ollama_resp.message.content, "LLM raw output");
-    Ok(ollama_resp.message.content)
+    tracing::debug!(raw_response = %content, "LLM raw output");
+    Ok(content)
 }

 /// Parse the LLM JSON response into NormalizedFields.