fix: update litellm mainstream models registration and gateway defaults

2026-06-27 14:49:08 +08:00 · 2026-06-27 14:49:08 +08:00 · d806ba9d3d
commit d806ba9d3d
parent a2ce5b9d05
2 changed files with 99 additions and 16 deletions
--- a/roles/vhosts/gateway_openclaw/defaults/main.yml
+++ b/roles/vhosts/gateway_openclaw/defaults/main.yml
@ -65,10 +65,10 @@ gateway_openclaw_fallbacks_deepseek:
 gateway_openclaw_fallbacks_nvidia:
  - "nvidia/deepseek-v4-flash"
  - "nvidia/deepseek-v4-pro"
-  - "nvidia/glm-5.2"
+  - "nvidia/glm-5.1"
  - "nvidia/minimax-m3"
  - "nvidia/qwen3.5"
-  - "nvidia/kimi-k2.7-code"
+  - "nvidia/kimi-k2.6"
 gateway_openclaw_fallbacks_ollama:
  - "ollama/deepseek-v4-flash"
  - "ollama/deepseek-v4-pro"
@ -96,10 +96,10 @@ gateway_openclaw_default_models_deepseek:
 gateway_openclaw_default_models_nvidia:
  "nvidia/deepseek-v4-flash": {}
  "nvidia/deepseek-v4-pro": {}
-  "nvidia/glm-5.2": {}
+  "nvidia/glm-5.1": {}
  "nvidia/minimax-m3": {}
  "nvidia/qwen3.5": {}
-  "nvidia/kimi-k2.7-code": {}
+  "nvidia/kimi-k2.6": {}
 gateway_openclaw_default_models_ollama:
  "ollama/deepseek-v4-flash": {}
  "ollama/deepseek-v4-pro": {}
--- a/roles/vhosts/litellm/files/register_mainstream_models.sh
+++ b/roles/vhosts/litellm/files/register_mainstream_models.sh
@ -20,6 +20,9 @@ fi

 echo "[INFO] Using LiteLLM URL: $LITELLM_URL"

+# Aliases successfully registered, collected for the post-registration probe.
+REGISTERED=()
+
 # Function to add a model
 add_model() {
    local alias_name="$1"
@ -79,6 +82,7 @@ EOF
    http_code=$(echo "$response" | grep -Eo 'HTTP_CODE:[0-9]{3}' | cut -d':' -f2 || echo "000")
    if [ "$http_code" = "200" ] || [ "$http_code" = "201" ]; then
        echo "[SUCCESS] Model $alias_name added."
+        REGISTERED+=("$alias_name")
    else
        echo "[INFO] Model $alias_name failed to add via /model/new (HTTP $http_code), attempting /model/update..."
        response=$(curl -s -w "\nHTTP_CODE:%{http_code}" -X POST "$LITELLM_URL/model/update" \
@ -88,6 +92,7 @@ EOF
        http_code=$(echo "$response" | grep -Eo 'HTTP_CODE:[0-9]{3}' | cut -d':' -f2 || echo "000")
        if [ "$http_code" = "200" ] || [ "$http_code" = "201" ]; then
            echo "[SUCCESS] Model $alias_name updated."
+            REGISTERED+=("$alias_name")
        else
            echo "[ERROR] Failed to add/update model $alias_name. HTTP Code: $http_code"
            echo "Response: $response"
@ -95,6 +100,36 @@ EOF
    fi
 }

+# Probe a single registered alias by sending a real 1-token completion through
+# LiteLLM. Registration (presence in /v1/models) only proves the row exists in
+# the DB; it does NOT prove the upstream model id / api_base / entitlement are
+# valid. This is the only check that proves an alias is actually callable.
+# Echoes "PASS" / "FAIL <http> <reason>" and returns 0 only on PASS.
+probe_model() {
+    local alias_name="$1"
+    local body http_code msg
+    body=$(curl -s -m 60 -w "\nHTTP_CODE:%{http_code}" \
+        -X POST "$LITELLM_URL/v1/chat/completions" \
+        -H "Authorization: Bearer $LITELLM_TOKEN" \
+        -H "Content-Type: application/json" \
+        -d "{\"model\":\"$alias_name\",\"messages\":[{\"role\":\"user\",\"content\":\"ping\"}],\"max_tokens\":1}") || true
+    http_code=$(echo "$body" | grep -Eo 'HTTP_CODE:[0-9]{3}' | cut -d':' -f2 || echo "000")
+    if [ "$http_code" = "200" ]; then
+        echo "PASS"
+        return 0
+    fi
+    # Pull a short reason out of the error for the report. Prefer the upstream
+    # provider message (e.g. "this model requires a subscription") over
+    # LiteLLM's verbose fallback-wrapper text, then cap the length.
+    local flat
+    flat=$(echo "$body" | sed 's/HTTP_CODE:[0-9]*//' | tr '\n' ' ')
+    msg=$(echo "$flat" | grep -Eo "'error': '[^']*'" | head -1 | sed "s/'error': '//; s/'$//")
+    [ -z "$msg" ] && msg=$(echo "$flat" | grep -Eo '"message":"[^"]*"' | head -1 | cut -d'"' -f4)
+    [ -z "$msg" ] && msg="$flat"
+    echo "FAIL $http_code $(echo "${msg:-unknown}" | cut -c1-90)"
+    return 1
+}
+
 if [ -n "${DEEPSEEK_API_KEY:-}" ]; then
    echo "========================================="
    echo "Registering DeepSeek Models..."
@ -109,12 +144,18 @@ if [ -n "${NVIDIA_API_KEY:-}" ]; then
    echo "========================================="
    echo "Registering NVIDIA Build Models..."
    echo "========================================="
-    add_model "nvidia/deepseek-v4-flash" "openai/deepseek-v4-flash" "NVIDIA_API_KEY" "https://integrate.api.nvidia.com/v1"
-    add_model "nvidia/deepseek-v4-pro" "openai/deepseek-v4-pro" "NVIDIA_API_KEY" "https://integrate.api.nvidia.com/v1"
-    add_model "nvidia/glm-5.2" "openai/thudm/glm-5.2-chat" "NVIDIA_API_KEY" "https://integrate.api.nvidia.com/v1"
-    add_model "nvidia/minimax-m3" "openai/minimax/minimax-m3" "NVIDIA_API_KEY" "https://integrate.api.nvidia.com/v1"
-    add_model "nvidia/qwen3.5" "openai/alibaba/qwen3.5-72b-instruct" "NVIDIA_API_KEY" "https://integrate.api.nvidia.com/v1"
-    add_model "nvidia/kimi-k2.7-code" "openai/moonshot/kimi-k2.7-code" "NVIDIA_API_KEY" "https://integrate.api.nvidia.com/v1"
+    # NVIDIA NIM model ids are vendor-namespaced (deepseek-ai/..., minimaxai/...,
+    # qwen/..., z-ai/..., moonshotai/...); bare names 404 on the upstream router.
+    # Every alias below maps to a model that EXISTS in the live GET /v1/models
+    # catalog. NVIDIA serves glm-5.1 and kimi-k2.6 (no 5.2 / k2.7), so the
+    # aliases are named for the real versions rather than lying about them.
+    NVIDIA_API_BASE="${NVIDIA_API_BASE:-https://integrate.api.nvidia.com/v1}"
+    add_model "nvidia/deepseek-v4-flash" "openai/deepseek-ai/deepseek-v4-flash" "NVIDIA_API_KEY" "$NVIDIA_API_BASE"
+    add_model "nvidia/deepseek-v4-pro" "openai/deepseek-ai/deepseek-v4-pro" "NVIDIA_API_KEY" "$NVIDIA_API_BASE"
+    add_model "nvidia/glm-5.1" "openai/z-ai/glm-5.1" "NVIDIA_API_KEY" "$NVIDIA_API_BASE"
+    add_model "nvidia/minimax-m3" "openai/minimaxai/minimax-m3" "NVIDIA_API_KEY" "$NVIDIA_API_BASE"
+    add_model "nvidia/qwen3.5" "openai/qwen/qwen3.5-397b-a17b" "NVIDIA_API_KEY" "$NVIDIA_API_BASE"
+    add_model "nvidia/kimi-k2.6" "openai/moonshotai/kimi-k2.6" "NVIDIA_API_KEY" "$NVIDIA_API_BASE"
 fi

 echo "========================================="
@ -149,13 +190,55 @@ if [ -n "${OLLAMA_API_KEY:-}" ]; then
    echo "Registering OLLAMA Cloud Models..."
    echo "========================================="
    OLLAMA_API_BASE="${OLLAMA_API_BASE:-https://api.ollama.cloud/v1}"
-    add_model "ollama/deepseek-v4-flash" "openai/deepseek-v4-flash" "OLLAMA_API_KEY" "$OLLAMA_API_BASE"
-    add_model "ollama/deepseek-v4-pro" "openai/deepseek-v4-pro" "OLLAMA_API_KEY" "$OLLAMA_API_BASE"
-    add_model "ollama/glm-5.2" "openai/thudm/glm-5.2-chat" "OLLAMA_API_KEY" "$OLLAMA_API_BASE"
-    add_model "ollama/minimax-m3" "openai/minimax/minimax-m3" "OLLAMA_API_KEY" "$OLLAMA_API_BASE"
-    add_model "ollama/qwen3.5" "openai/alibaba/qwen3.5-72b-instruct" "OLLAMA_API_KEY" "$OLLAMA_API_BASE"
-    add_model "ollama/kimi-k2.7-code" "openai/moonshot/kimi-k2.7-code" "OLLAMA_API_KEY" "$OLLAMA_API_BASE"
+    # Ollama Cloud model ids carry a tag (":cloud" for the hosted big models),
+    # per https://ollama.com/search. The bare names below resolve to a local
+    # pull that the cloud endpoint does not have -> 404 "model not found".
+    # NOTE: the :cloud models require an Ollama paid subscription; without one
+    # the upstream returns 403. The verification pass at the end will surface
+    # this clearly (a 403/404 here is an upstream entitlement issue, not a
+    # config bug).
+    add_model "ollama/deepseek-v4-flash" "openai/deepseek-v4-flash:cloud" "OLLAMA_API_KEY" "$OLLAMA_API_BASE"
+    add_model "ollama/deepseek-v4-pro" "openai/deepseek-v4-pro:cloud" "OLLAMA_API_KEY" "$OLLAMA_API_BASE"
+    add_model "ollama/glm-5.2" "openai/glm-5.2:cloud" "OLLAMA_API_KEY" "$OLLAMA_API_BASE"
+    add_model "ollama/minimax-m3" "openai/minimax-m3:cloud" "OLLAMA_API_KEY" "$OLLAMA_API_BASE"
+    add_model "ollama/qwen3.5" "openai/qwen3.5:cloud" "OLLAMA_API_KEY" "$OLLAMA_API_BASE"
+    add_model "ollama/kimi-k2.7-code" "openai/kimi-k2.7-code:cloud" "OLLAMA_API_KEY" "$OLLAMA_API_BASE"
 fi

 echo "All models requested have been registered."
 echo "You can check them at $LITELLM_URL/ui/?page=models"
+
+# =============================================================================
+# Verification pass: prove callability, not mere presence in /v1/models.
+# Sends a real 1-token completion through LiteLLM for every registered alias and
+# prints a PASS/FAIL health table. Controlled by REGISTER_MODELS_VERIFY (default
+# on); set REGISTER_MODELS_VERIFY=0 to skip. A FAIL here is the real signal that
+# a fallback link is unhealthy even though it shows up in /v1/models.
+# =============================================================================
+if [ "${REGISTER_MODELS_VERIFY:-1}" != "0" ] && [ "${#REGISTERED[@]}" -gt 0 ]; then
+    echo "========================================="
+    echo "Verifying callability (1-token live probe per alias)..."
+    echo "========================================="
+    pass_count=0
+    fail_count=0
+    fail_list=()
+    for alias_name in "${REGISTERED[@]}"; do
+        # `|| true` keeps the non-zero FAIL return from tripping `set -e`.
+        result="$(probe_model "$alias_name" || true)"
+        if [ "$result" = "PASS" ]; then
+            printf '  [PASS] %s\n' "$alias_name"
+            pass_count=$((pass_count + 1))
+        else
+            printf '  [FAIL] %-28s %s\n' "$alias_name" "${result#FAIL }"
+            fail_count=$((fail_count + 1))
+            fail_list+=("$alias_name")
+        fi
+    done
+    echo "-----------------------------------------"
+    echo "Callable: $pass_count   Unhealthy: $fail_count   (of ${#REGISTERED[@]} registered)"
+    if [ "$fail_count" -gt 0 ]; then
+        echo "Unhealthy aliases (registered but NOT callable): ${fail_list[*]}"
+        echo "These appear in /v1/models but fail a real call — check upstream"
+        echo "model id, api_base, and account entitlement (e.g. 403 = subscription)."
+    fi
+fi