PR was blocked by .github/workflows/guard-fork-dependencies.yml: fork PRs cannot modify uv.lock. Reverting: - uv.lock + pyproject.toml black bump (24.10.0 -> 26.3.1) and the 295 files of mechanical Black 26 reformat coupled to it - pyproject.toml diskcache extra change (kept the runtime mitigation in litellm/caching/disk_cache.py via JSONDisk) Kept: - Dockerfile cache narrowing (drops ~660 MB of uv build cache that surfaced cached setuptools as CVE findings) - litellm/caching/disk_cache.py: dc.JSONDisk to neutralize CVE-2025-69872 - ui/litellm-dashboard/package-lock.json + litellm-js/spend-logs/package-lock.json: next/postcss/hono/uuid CVE bumps (these are not blocked by the fork guard) - tests/test_litellm/caching/test_disk_cache.py - tests/code_coverage_tests/liccheck.ini: harmless black authorization Black + gitpython + langchain dep upgrades will need a follow-up from a maintainer pushing a branch in the canonical BerriAI/litellm repo. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
46 lines
1.2 KiB
Python
46 lines
1.2 KiB
Python
"""
|
|
Auto-detect content type per message: code, JSON, or text.
|
|
"""
|
|
|
|
import json
|
|
import re
|
|
|
|
|
|
_CODE_KEYWORDS = re.compile(
|
|
r"\b(?:def |function |class |import |from |require\(|#include|fn |func |const |let |var |public |private |static )\b"
|
|
)
|
|
|
|
|
|
def detect_content_type(content: str) -> str:
|
|
"""
|
|
Detect whether content is code, JSON, or plain text.
|
|
|
|
Returns one of: "code", "json", "text"
|
|
"""
|
|
stripped = content.strip()
|
|
if not stripped:
|
|
return "text"
|
|
|
|
# Check JSON
|
|
if stripped[0] in ("{", "["):
|
|
try:
|
|
json.loads(stripped)
|
|
return "json"
|
|
except (json.JSONDecodeError, ValueError):
|
|
pass
|
|
|
|
# Check code indicators
|
|
# Sample first 5000 chars for performance
|
|
sample = stripped[:5000]
|
|
keyword_matches = len(_CODE_KEYWORDS.findall(sample))
|
|
lines = sample.split("\n")
|
|
indented_lines = sum(
|
|
1 for line in lines if line.startswith((" ", "\t")) and line.strip()
|
|
)
|
|
|
|
# If we see multiple code keywords or significant indentation, it's likely code
|
|
if keyword_matches >= 3 or (indented_lines > len(lines) * 0.3 and len(lines) > 5):
|
|
return "code"
|
|
|
|
return "text"
|