diff --git a/USAGE.md b/USAGE.md index 25f8ca38..873dde91 100644 --- a/USAGE.md +++ b/USAGE.md @@ -245,6 +245,7 @@ export ANTHROPIC_AUTH_TOKEN="anthropic-oauth-or-proxy-bearer-token" | `sk-ant-*` API key | `ANTHROPIC_API_KEY` | `x-api-key: sk-ant-...` | [console.anthropic.com](https://console.anthropic.com) | | OAuth access token (opaque) | `ANTHROPIC_AUTH_TOKEN` | `Authorization: Bearer ...` | an Anthropic-compatible proxy or OAuth flow that mints bearer tokens | | OpenRouter key (`sk-or-v1-*`) | `OPENAI_API_KEY` + `OPENAI_BASE_URL=https://openrouter.ai/api/v1` | `Authorization: Bearer ...` | [openrouter.ai/keys](https://openrouter.ai/keys) | +| Ollama local instance | `OLLAMA_HOST` | no auth header (Ollama requires none) | local Ollama server at `http://127.0.0.1:11434` | **Why this matters:** if you paste an `sk-ant-*` key into `ANTHROPIC_AUTH_TOKEN`, Anthropic's API will return `401 Invalid bearer token` because `sk-ant-*` keys are rejected over the Bearer header. The fix is a one-line env var swap — move the key to `ANTHROPIC_API_KEY`. Recent `claw` builds detect this exact shape (401 + `sk-ant-*` in the Bearer slot) and append a hint to the error message pointing at the fix. @@ -305,18 +306,18 @@ cd rust ### Ollama ```bash -export OPENAI_BASE_URL="http://127.0.0.1:11434/v1" -unset OPENAI_API_KEY +export OLLAMA_HOST="http://127.0.0.1:11434" cd rust ./target/debug/claw --model "llama3.2" prompt "summarize this repository in one sentence" ``` -For Ollama tags with punctuation (for example `qwen2.5-coder:7b`), `OPENAI_BASE_URL` selects the local OpenAI-compatible route even when `OPENAI_API_KEY` is unset: +`OLLAMA_HOST` is the preferred env var. Claw routes all models to the local Ollama endpoint automatically, and no API key is needed. The older `OPENAI_BASE_URL` + `OPENAI_API_KEY` workaround is also supported. + +For Ollama tags with punctuation (for example `qwen2.5-coder:7b`), both approaches work: ```bash -export OPENAI_BASE_URL="http://127.0.0.1:11434/v1" -unset OPENAI_API_KEY +export OLLAMA_HOST="http://127.0.0.1:11434" cd rust ./target/debug/claw --model "qwen2.5-coder:7b" prompt "reply with ready" diff --git a/docs/local-openai-compatible-providers.md b/docs/local-openai-compatible-providers.md index aabbe696..cf3766e0 100644 --- a/docs/local-openai-compatible-providers.md +++ b/docs/local-openai-compatible-providers.md @@ -57,11 +57,12 @@ ollama serve In another shell: ```bash -export OPENAI_BASE_URL="http://127.0.0.1:11434/v1" -unset OPENAI_API_KEY +export OLLAMA_HOST="http://127.0.0.1:11434" claw --model "qwen3:latest" prompt "Reply exactly HELLO_WORLD_123" ``` +`OLLAMA_HOST` is the preferred env var for Ollama. Claw routes all models to the local OpenAI-compatible endpoint automatically when this is set, and no API key is needed. The older `OPENAI_BASE_URL` + `OPENAI_API_KEY` workaround is also supported for existing setups. + If Ollama is running without auth, `unset OPENAI_API_KEY` is acceptable. Use a placeholder token rather than a real cloud API key if your local server requires an Authorization header. ## llama.cpp server diff --git a/rust/crates/api/src/client.rs b/rust/crates/api/src/client.rs index 55d200c5..240559e8 100644 --- a/rust/crates/api/src/client.rs +++ b/rust/crates/api/src/client.rs @@ -32,16 +32,25 @@ impl ProviderClient { OpenAiCompatConfig::xai(), )?)), ProviderKind::OpenAi => { - // DashScope models (qwen-*) also return ProviderKind::OpenAi because they - // speak the OpenAI wire format, but they need the DashScope config which - // reads DASHSCOPE_API_KEY and points at dashscope.aliyuncs.com. - let config = match providers::metadata_for_model(&resolved_model) { - Some(meta) if meta.auth_env == "DASHSCOPE_API_KEY" => { - OpenAiCompatConfig::dashscope() - } - _ => OpenAiCompatConfig::openai(), - }; - Ok(Self::OpenAi(OpenAiCompatClient::from_env(config)?)) + // OLLAMA_HOST takes priority: local Ollama needs no API key + // and ignores DashScope/OpenAI env-based dispatch. + if std::env::var_os("OLLAMA_HOST").is_some() { + Ok(Self::OpenAi( + openai_compat::OpenAiCompatClient::from_ollama_env() + .expect("from_ollama_env always returns Some"), + )) + } else { + // DashScope models (qwen-*) also return ProviderKind::OpenAi because they + // speak the OpenAI wire format, but they need the DashScope config which + // reads DASHSCOPE_API_KEY and points at dashscope.aliyuncs.com. + let config = match providers::metadata_for_model(&resolved_model) { + Some(meta) if meta.auth_env == "DASHSCOPE_API_KEY" => { + OpenAiCompatConfig::dashscope() + } + _ => OpenAiCompatConfig::openai(), + }; + Ok(Self::OpenAi(OpenAiCompatClient::from_env(config)?)) + } } } } diff --git a/rust/crates/api/src/providers/mod.rs b/rust/crates/api/src/providers/mod.rs index f8fe6244..57dce27e 100644 --- a/rust/crates/api/src/providers/mod.rs +++ b/rust/crates/api/src/providers/mod.rs @@ -351,6 +351,11 @@ fn looks_like_local_openai_model(model: &str) -> bool { #[must_use] pub fn detect_provider_kind(model: &str) -> ProviderKind { + // OLLAMA_HOST takes priority: if set, route all models through the local + // OpenAI-compatible endpoint regardless of model name or other env vars. + if std::env::var_os("OLLAMA_HOST").is_some() { + return ProviderKind::OpenAi; + } let resolved_model = resolve_model_alias(model); if let Some(metadata) = metadata_for_model(&resolved_model) { return metadata.provider; diff --git a/rust/crates/api/src/providers/openai_compat.rs b/rust/crates/api/src/providers/openai_compat.rs index a1a82b88..7f82d00c 100644 --- a/rust/crates/api/src/providers/openai_compat.rs +++ b/rust/crates/api/src/providers/openai_compat.rs @@ -49,6 +49,14 @@ const XAI_MAX_REQUEST_BODY_BYTES: usize = 52_428_800; // 50MB const OPENAI_MAX_REQUEST_BODY_BYTES: usize = 104_857_600; // 100MB const DASHSCOPE_MAX_REQUEST_BODY_BYTES: usize = 6_291_456; // 6MB (observed limit in dogfood) +pub const OLLAMA_CONFIG: OpenAiCompatConfig = OpenAiCompatConfig { + provider_name: "Ollama", + api_key_env: "OLLAMA_HOST", + base_url_env: "OLLAMA_HOST", + default_base_url: "http://127.0.0.1:11434/v1", + max_request_body_bytes: 104_857_600, +}; + impl OpenAiCompatConfig { #[must_use] pub const fn xai() -> Self { @@ -149,6 +157,22 @@ impl OpenAiCompatClient { }; Ok(Self::new(api_key, config).with_base_url(base_url)) } + /// Create an Ollama client from `OLLAMA_HOST` env var. + /// Ollama requires no API key; a placeholder is used for the Authorization header. + pub fn from_ollama_env() -> Option { + let host = + std::env::var("OLLAMA_HOST").unwrap_or_else(|_| "http://127.0.0.1:11434".to_string()); + let base_url = format!("{}/v1", host.trim_end_matches('/')); + Some(Self { + http: build_http_client_or_default(), + api_key: "ollama".to_string(), + config: OLLAMA_CONFIG, + base_url, + max_retries: DEFAULT_MAX_RETRIES, + initial_backoff: DEFAULT_INITIAL_BACKOFF, + max_backoff: DEFAULT_MAX_BACKOFF, + }) + } #[must_use] pub fn with_base_url(mut self, base_url: impl Into) -> Self { diff --git a/rust/crates/rusty-claude-cli/src/main.rs b/rust/crates/rusty-claude-cli/src/main.rs index 9b1d8a74..38974eb5 100644 --- a/rust/crates/rusty-claude-cli/src/main.rs +++ b/rust/crates/rusty-claude-cli/src/main.rs @@ -2903,6 +2903,14 @@ fn resolve_model_alias_with_config(model: &str) -> String { /// Rejects: empty, whitespace-only, strings with spaces, or invalid chars. fn validate_model_syntax(model: &str) -> Result<(), String> { let trimmed = model.trim(); + // Ollama models use names like "qwen3:8b" that don't match provider/model + // syntax. Skip strict validation when OLLAMA_HOST is configured. + if std::env::var_os("OLLAMA_HOST").is_some() { + if trimmed.is_empty() { + return Err("invalid model syntax: model string cannot be empty.\nUsage: --model e.g. --model qwen3:8b".to_string()); + } + return Ok(()); + } if trimmed.is_empty() { return Err("invalid model syntax: model string cannot be empty.\nUsage: --model e.g. --model anthropic/claude-opus-4-7".to_string()); } @@ -19689,4 +19697,16 @@ mod alias_resolution_tests { assert_eq!(resolve_model_alias_with_config(model), model); assert!(validate_model_syntax(model).is_ok()); } + #[test] + fn test_ollama_host_bypasses_model_validation() { + // Safety: test sets and clears env var within the test. + std::env::set_var("OLLAMA_HOST", "http://127.0.0.1:11434"); + // Ollama model names with colons pass + assert!(validate_model_syntax("qwen3:8b").is_ok()); + assert!(validate_model_syntax("gemma4:e2b").is_ok()); + assert!(validate_model_syntax("qwen3.6:27b-nvfp4").is_ok()); + // Empty model still rejected + assert!(validate_model_syntax("").is_err()); + std::env::remove_var("OLLAMA_HOST"); + } }