feat: Add line_profiler support for performance analysis and fix Windows CRLF issues in Docker builds (#18773)

This commit is contained in:
Alexsander Hamir 2026-01-07 11:36:57 -08:00 committed by GitHub
parent ea8a94988f
commit 1544e8f971
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 412 additions and 19 deletions

View File

@ -20,7 +20,8 @@ RUN python -m pip install build
COPY . .
# Build Admin UI
RUN chmod +x docker/build_admin_ui.sh && ./docker/build_admin_ui.sh
# Convert Windows line endings to Unix and make executable
RUN sed -i 's/\r$//' docker/build_admin_ui.sh && chmod +x docker/build_admin_ui.sh && ./docker/build_admin_ui.sh
# Build the package
RUN rm -rf dist/* && python -m build
@ -65,12 +66,14 @@ RUN find /usr/lib -type f -path "*/tornado/test/*" -delete && \
find /usr/lib -type d -path "*/tornado/test" -delete
# Install semantic_router and aurelio-sdk using script
RUN chmod +x docker/install_auto_router.sh && ./docker/install_auto_router.sh
# Convert Windows line endings to Unix and make executable
RUN sed -i 's/\r$//' docker/install_auto_router.sh && chmod +x docker/install_auto_router.sh && ./docker/install_auto_router.sh
# Generate prisma client
RUN prisma generate
RUN chmod +x docker/entrypoint.sh
RUN chmod +x docker/prod_entrypoint.sh
# Convert Windows line endings to Unix for entrypoint scripts
RUN sed -i 's/\r$//' docker/entrypoint.sh && chmod +x docker/entrypoint.sh
RUN sed -i 's/\r$//' docker/prod_entrypoint.sh && chmod +x docker/prod_entrypoint.sh
EXPOSE 4000/tcp

View File

@ -8,7 +8,8 @@ WORKDIR /app
COPY config.yaml .
# Make sure your docker/entrypoint.sh is executable
RUN chmod +x docker/entrypoint.sh
# Convert Windows line endings to Unix
RUN sed -i 's/\r$//' docker/entrypoint.sh && chmod +x docker/entrypoint.sh
# Expose the necessary port
EXPOSE 4000/tcp

View File

@ -46,8 +46,9 @@ COPY --from=builder /wheels/ /wheels/
# Install the built wheel using pip; again using a wildcard if it's the only file
RUN pip install *.whl /wheels/* --no-index --find-links=/wheels/ && rm -f *.whl && rm -rf /wheels
RUN chmod +x docker/entrypoint.sh
RUN chmod +x docker/prod_entrypoint.sh
# Convert Windows line endings to Unix for entrypoint scripts
RUN sed -i 's/\r$//' docker/entrypoint.sh && chmod +x docker/entrypoint.sh
RUN sed -i 's/\r$//' docker/prod_entrypoint.sh && chmod +x docker/prod_entrypoint.sh
EXPOSE 4000/tcp

View File

@ -32,8 +32,9 @@ RUN rm -rf /app/litellm/proxy/_experimental/out/* && \
WORKDIR /app
# Make sure your docker/entrypoint.sh is executable
RUN chmod +x docker/entrypoint.sh
RUN chmod +x docker/prod_entrypoint.sh
# Convert Windows line endings to Unix for entrypoint scripts
RUN sed -i 's/\r$//' docker/entrypoint.sh && chmod +x docker/entrypoint.sh
RUN sed -i 's/\r$//' docker/prod_entrypoint.sh && chmod +x docker/prod_entrypoint.sh
# Expose the necessary port
EXPOSE 4000/tcp

View File

@ -27,7 +27,8 @@ RUN python -m pip install build
COPY . .
# Build Admin UI
RUN chmod +x docker/build_admin_ui.sh && ./docker/build_admin_ui.sh
# Convert Windows line endings to Unix and make executable
RUN sed -i 's/\r$//' docker/build_admin_ui.sh && chmod +x docker/build_admin_ui.sh && ./docker/build_admin_ui.sh
# Build the package
RUN rm -rf dist/* && python -m build
@ -63,20 +64,23 @@ COPY --from=builder /wheels/ /wheels/
RUN pip install *.whl /wheels/* --no-index --find-links=/wheels/ && rm -f *.whl && rm -rf /wheels
# Install semantic_router and aurelio-sdk using script
RUN chmod +x docker/install_auto_router.sh && ./docker/install_auto_router.sh
# Convert Windows line endings to Unix and make executable
RUN sed -i 's/\r$//' docker/install_auto_router.sh && chmod +x docker/install_auto_router.sh && ./docker/install_auto_router.sh
# ensure pyjwt is used, not jwt
RUN pip uninstall jwt -y
RUN pip uninstall PyJWT -y
RUN pip install PyJWT==2.9.0 --no-cache-dir
# Build Admin UI
RUN chmod +x docker/build_admin_ui.sh && ./docker/build_admin_ui.sh
# Build Admin UI (runtime stage)
# Convert Windows line endings to Unix and make executable
RUN sed -i 's/\r$//' docker/build_admin_ui.sh && chmod +x docker/build_admin_ui.sh && ./docker/build_admin_ui.sh
# Generate prisma client
RUN prisma generate
RUN chmod +x docker/entrypoint.sh
RUN chmod +x docker/prod_entrypoint.sh
# Convert Windows line endings to Unix for entrypoint scripts
RUN sed -i 's/\r$//' docker/entrypoint.sh && chmod +x docker/entrypoint.sh
RUN sed -i 's/\r$//' docker/prod_entrypoint.sh && chmod +x docker/prod_entrypoint.sh
EXPOSE 4000/tcp
RUN apk add --no-cache supervisor

View File

@ -40,7 +40,8 @@ COPY enterprise/ ./enterprise/
COPY docker/ ./docker/
# Build Admin UI once
RUN chmod +x docker/build_admin_ui.sh && ./docker/build_admin_ui.sh
# Convert Windows line endings to Unix and make executable
RUN sed -i 's/\r$//' docker/build_admin_ui.sh && chmod +x docker/build_admin_ui.sh && ./docker/build_admin_ui.sh
# Build the package
RUN rm -rf dist/* && python -m build
@ -79,8 +80,12 @@ RUN pip install --no-cache-dir *.whl /wheels/* --no-index --find-links=/wheels/
rm -rf /wheels
# Generate prisma client and set permissions
# Convert Windows line endings to Unix for entrypoint scripts
RUN prisma generate && \
chmod +x docker/entrypoint.sh docker/prod_entrypoint.sh
sed -i 's/\r$//' docker/entrypoint.sh && \
sed -i 's/\r$//' docker/prod_entrypoint.sh && \
chmod +x docker/entrypoint.sh && \
chmod +x docker/prod_entrypoint.sh
EXPOSE 4000/tcp

View File

@ -144,7 +144,10 @@ RUN pip install --no-index --find-links=/wheels/ -r requirements.txt && \
fi
# Permissions, cleanup, and Prisma prep
RUN chmod +x docker/entrypoint.sh docker/prod_entrypoint.sh && \
# Convert Windows line endings to Unix for entrypoint scripts
RUN sed -i 's/\r$//' docker/entrypoint.sh && \
sed -i 's/\r$//' docker/prod_entrypoint.sh && \
chmod +x docker/entrypoint.sh docker/prod_entrypoint.sh && \
mkdir -p /nonexistent /.npm /var/lib/litellm/assets /var/lib/litellm/ui && \
chown -R nobody:nogroup /app /var/lib/litellm/ui /var/lib/litellm/assets /nonexistent /.npm && \
pip uninstall jwt -y || true && \

View File

@ -0,0 +1,214 @@
# Performance Utilities Documentation
This module provides performance monitoring and profiling functionality for LiteLLM proxy server using `cProfile` and `line_profiler`.
## Table of Contents
- [Line Profiler Usage](#line-profiler-usage)
- [Example 1: Wrapping a function directly](#example-1-wrapping-a-function-directly)
- [Example 2: Wrapping a module function dynamically](#example-2-wrapping-a-module-function-dynamically)
- [Example 3: Manual stats collection](#example-3-manual-stats-collection)
- [Example 4: Analyzing the profile output](#example-4-analyzing-the-profile-output)
- [Example 5: Using in a decorator pattern](#example-5-using-in-a-decorator-pattern)
- [cProfile Usage](#cprofile-usage)
- [Installation](#installation)
- [Notes](#notes)
## Line Profiler Usage
### Example 1: Wrapping a function directly
This is how it's used in `litellm/utils.py` to profile `wrapper_async`:
```python
from litellm.proxy.common_utils.performance_utils import (
register_shutdown_handler,
wrap_function_directly,
)
def client(original_function):
@wraps(original_function)
async def wrapper_async(*args, **kwargs):
# ... function implementation ...
pass
# Wrap the function with line_profiler
wrapper_async = wrap_function_directly(wrapper_async)
# Register shutdown handler to collect stats on server shutdown
register_shutdown_handler(output_file="wrapper_async_line_profile.lprof")
return wrapper_async
```
### Example 2: Wrapping a module function dynamically
```python
import my_module
from litellm.proxy.common_utils.performance_utils import (
wrap_function_with_line_profiler,
register_shutdown_handler,
)
# Wrap a function in a module
wrap_function_with_line_profiler(my_module, "expensive_function")
# Register shutdown handler
register_shutdown_handler(output_file="my_profile.lprof")
# Now all calls to my_module.expensive_function will be profiled
my_module.expensive_function()
```
### Example 3: Manual stats collection
```python
from litellm.proxy.common_utils.performance_utils import (
wrap_function_directly,
collect_line_profiler_stats,
)
def my_function():
# ... implementation ...
pass
# Wrap the function
my_function = wrap_function_directly(my_function)
# Run your code
my_function()
# Collect stats manually (instead of waiting for shutdown)
collect_line_profiler_stats(output_file="manual_profile.lprof")
```
### Example 4: Analyzing the profile output
After running your code, analyze the `.lprof` file:
```bash
# View the profile
python -m line_profiler wrapper_async_line_profile.lprof
# Save to text file
python -m line_profiler wrapper_async_line_profile.lprof > profile_report.txt
```
The output shows:
- **Line #**: Line number in the source file
- **Hits**: Number of times the line was executed
- **Time**: Total time spent on that line (in microseconds)
- **Per Hit**: Average time per execution
- **% Time**: Percentage of total function time
- **Line Contents**: The actual source code
Example output:
```
Timer unit: 1e-06 s
Total time: 3.73697 s
File: litellm/utils.py
Function: client.<locals>.wrapper_async at line 1657
Line # Hits Time Per Hit % Time Line Contents
==============================================================
1657 @wraps(original_function)
1658 async def wrapper_async(*args, **kwargs):
1659 2005 7577.1 3.8 0.2 print_args_passed_to_litellm(...)
1763 2005 1351909.0 674.3 36.2 result = await original_function(*args, **kwargs)
1846 4010 1543688.1 385.0 41.3 update_response_metadata(...)
```
### Example 5: Using in a decorator pattern
```python
from litellm.proxy.common_utils.performance_utils import (
wrap_function_directly,
register_shutdown_handler,
)
def profile_decorator(func):
# Wrap the function
profiled_func = wrap_function_directly(func)
# Register shutdown handler (only once)
if not hasattr(profile_decorator, '_registered'):
register_shutdown_handler(output_file="decorated_functions.lprof")
profile_decorator._registered = True
return profiled_func
@profile_decorator
async def my_async_function():
# This function will be profiled
pass
```
## cProfile Usage
### Example: Using the profile_endpoint decorator
```python
from litellm.proxy.common_utils.performance_utils import profile_endpoint
@profile_endpoint(sampling_rate=0.1) # Profile 10% of requests
async def my_endpoint():
# ... implementation ...
pass
```
The `sampling_rate` parameter controls what percentage of requests are profiled:
- `1.0`: Profile all requests (100%)
- `0.1`: Profile 1 in 10 requests (10%)
- `0.0`: Profile no requests (0%)
## Installation
`line_profiler` must be installed to use the line profiling functionality:
```bash
pip install line_profiler
```
On Windows with Python 3.14+, you may need to install Microsoft Visual C++ Build Tools to compile `line_profiler` from source.
## Notes
- The profiler aggregates stats by source code location, so multiple instances of the same function (e.g., closures) will be profiled together
- Stats are automatically collected on server shutdown via `atexit` handler when using `register_shutdown_handler()`
- You can also manually collect stats using `collect_line_profiler_stats()`
- The line profiler will fail with an `ImportError` if `line_profiler` is not installed (as configured in `litellm/utils.py`)
## API Reference
### `wrap_function_directly(func: Callable) -> Callable`
Wrap a function directly with line_profiler. This is the recommended way to profile functions, especially closures or functions created dynamically.
**Raises:**
- `ImportError`: If line_profiler is not available
- `RuntimeError`: If line_profiler cannot be enabled or function cannot be wrapped
### `wrap_function_with_line_profiler(module: Any, function_name: str) -> bool`
Dynamically wrap a function in a module with line_profiler.
**Returns:** `True` if wrapping was successful, `False` otherwise
### `collect_line_profiler_stats(output_file: Optional[str] = None) -> None`
Collect and save line_profiler statistics. If `output_file` is provided, saves to file. Otherwise, prints to stdout.
### `register_shutdown_handler(output_file: Optional[str] = None) -> None`
Register an `atexit` handler that will automatically save profiling statistics when the Python process exits. Safe to call multiple times (only registers once).
**Default output file:** `line_profile_stats.lprof` if not specified
### `profile_endpoint(sampling_rate: float = 1.0)`
Decorator to sample endpoint hits and save to a profile file using cProfile.
**Args:**
- `sampling_rate`: Rate of requests to profile (0.0 to 1.0)

View File

@ -2,14 +2,19 @@
Performance utilities for LiteLLM proxy server.
This module provides performance monitoring and profiling functionality for endpoint
performance analysis using cProfile with configurable sampling rates.
performance analysis using cProfile with configurable sampling rates, and line_profiler
for line-by-line profiling.
See performance_utils.md for detailed usage examples and documentation.
"""
import asyncio
import atexit
import cProfile
import functools
import threading
from pathlib import Path as PathLib
from typing import Any, Callable, Optional
from litellm._logging import verbose_proxy_logger
@ -20,6 +25,11 @@ _last_profile_file_path = None
_sample_counter = 0
_sample_counter_lock = threading.Lock()
# Global line_profiler state
_line_profiler: Optional[Any] = None
_line_profiler_lock = threading.Lock()
_wrapped_functions: dict[str, Callable] = {} # Store original functions
def _should_sample(profile_sampling_rate: float) -> bool:
"""Determine if current request should be sampled based on sampling rate."""
@ -123,3 +133,154 @@ def profile_endpoint(sampling_rate: float = 1.0):
raise
return sync_wrapper
return decorator
def enable_line_profiler() -> None:
"""Enable line_profiler for dynamic function wrapping.
Raises:
ImportError: If line_profiler is not available
"""
global _line_profiler
from line_profiler import LineProfiler # Will raise ImportError if not available
with _line_profiler_lock:
if _line_profiler is None:
_line_profiler = LineProfiler()
verbose_proxy_logger.info("Line profiler enabled")
def wrap_function_with_line_profiler(module: Any, function_name: str) -> bool:
"""Dynamically wrap a function with line_profiler.
Args:
module: The module containing the function
function_name: Name of the function to wrap
Returns:
True if wrapping was successful, False otherwise
"""
if not enable_line_profiler():
return False
if _line_profiler is None:
return False
try:
original_function = getattr(module, function_name, None)
if original_function is None:
verbose_proxy_logger.warning(
f"Function {function_name} not found in module {module.__name__}"
)
return False
# Store original function if not already wrapped
if function_name not in _wrapped_functions:
_wrapped_functions[function_name] = original_function
# Wrap with line_profiler
profiled_function = _line_profiler(original_function)
setattr(module, function_name, profiled_function)
verbose_proxy_logger.info(
f"Wrapped {module.__name__}.{function_name} with line_profiler"
)
return True
except Exception as e:
verbose_proxy_logger.error(
f"Error wrapping {function_name} with line_profiler: {e}"
)
return False
def wrap_function_directly(func: Callable) -> Callable:
"""Wrap a function directly with line_profiler.
This is the recommended way to profile functions, especially closures or
functions created dynamically (like wrapper_async in litellm/utils.py).
Args:
func: The function to wrap
Returns:
The wrapped function that will be profiled when called
Raises:
ImportError: If line_profiler is not available
RuntimeError: If line_profiler cannot be enabled or function cannot be wrapped
"""
import warnings
enable_line_profiler() # Will raise ImportError if not available
if _line_profiler is None:
raise RuntimeError("Line profiler was not initialized")
# Suppress warnings about __wrapped__ - we intentionally want to profile the wrapper
with warnings.catch_warnings():
warnings.filterwarnings('ignore', message='.*__wrapped__.*', category=UserWarning)
# Add function to line_profiler and wrap it
_line_profiler.add_function(func)
profiled_function = _line_profiler(func)
verbose_proxy_logger.info(
f"Wrapped function {func.__name__} with line_profiler"
)
return profiled_function
def collect_line_profiler_stats(output_file: Optional[str] = None) -> None:
"""Collect and save line_profiler statistics.
This can be called manually to collect stats at any time, or it's
automatically called on shutdown if register_shutdown_handler() was used.
Args:
output_file: Optional path to save stats. If None, prints to stdout.
"""
global _line_profiler
with _line_profiler_lock:
if _line_profiler is None:
verbose_proxy_logger.debug("Line profiler not enabled, nothing to collect")
return
try:
if output_file:
# Save to file
output_path = PathLib(output_file)
_line_profiler.dump_stats(str(output_path))
verbose_proxy_logger.info(
f"Line profiler stats saved to {output_path}"
)
else:
# Print to stdout
from io import StringIO
stream = StringIO()
_line_profiler.print_stats(stream=stream)
stats_output = stream.getvalue()
verbose_proxy_logger.info("Line profiler stats:\n" + stats_output)
except Exception as e:
verbose_proxy_logger.error(f"Error collecting line profiler stats: {e}")
def register_shutdown_handler(output_file: Optional[str] = None) -> None:
"""Register a shutdown handler to collect line_profiler stats.
This registers an atexit handler that will automatically save profiling
statistics when the Python process exits. Safe to call multiple times
(only registers once).
Args:
output_file: Optional path to save stats on shutdown.
Defaults to 'line_profile_stats.lprof'
"""
if output_file is None:
output_file = "line_profile_stats.lprof"
def shutdown_handler():
collect_line_profiler_stats(output_file=output_file)
atexit.register(shutdown_handler)
verbose_proxy_logger.debug(f"Registered line_profiler shutdown handler for {output_file}")