feat: Add line_profiler support for performance analysis and fix Windows CRLF issues in Docker builds (#18773)

2026-01-07 11:36:57 -08:00 · 2026-01-07 11:36:57 -08:00 · 1544e8f971
commit 1544e8f971
parent ea8a94988f
9 changed files with 412 additions and 19 deletions
--- a/11
+++ b/11
@ -20,7 +20,8 @@ RUN python -m pip install build
 COPY . .

 # Build Admin UI
-RUN chmod +x docker/build_admin_ui.sh && ./docker/build_admin_ui.sh
+# Convert Windows line endings to Unix and make executable
+RUN sed -i 's/\r$//' docker/build_admin_ui.sh && chmod +x docker/build_admin_ui.sh && ./docker/build_admin_ui.sh

 # Build the package
 RUN rm -rf dist/* && python -m build
@ -65,12 +66,14 @@ RUN find /usr/lib -type f -path "*/tornado/test/*" -delete && \
    find /usr/lib -type d -path "*/tornado/test" -delete

 # Install semantic_router and aurelio-sdk using script
-RUN chmod +x docker/install_auto_router.sh && ./docker/install_auto_router.sh
+# Convert Windows line endings to Unix and make executable
+RUN sed -i 's/\r$//' docker/install_auto_router.sh && chmod +x docker/install_auto_router.sh && ./docker/install_auto_router.sh

 # Generate prisma client
 RUN prisma generate
-RUN chmod +x docker/entrypoint.sh
-RUN chmod +x docker/prod_entrypoint.sh
+# Convert Windows line endings to Unix for entrypoint scripts
+RUN sed -i 's/\r$//' docker/entrypoint.sh && chmod +x docker/entrypoint.sh
+RUN sed -i 's/\r$//' docker/prod_entrypoint.sh && chmod +x docker/prod_entrypoint.sh

 EXPOSE 4000/tcp

--- a/deploy/Dockerfile.ghcr_base
+++ b/deploy/Dockerfile.ghcr_base
@ -8,7 +8,8 @@ WORKDIR /app
 COPY config.yaml .

 # Make sure your docker/entrypoint.sh is executable
-RUN chmod +x docker/entrypoint.sh
+# Convert Windows line endings to Unix
+RUN sed -i 's/\r$//' docker/entrypoint.sh && chmod +x docker/entrypoint.sh

 # Expose the necessary port
 EXPOSE 4000/tcp
--- a/docker/Dockerfile.alpine
+++ b/docker/Dockerfile.alpine
@ -46,8 +46,9 @@ COPY --from=builder /wheels/ /wheels/
 # Install the built wheel using pip; again using a wildcard if it's the only file
 RUN pip install *.whl /wheels/* --no-index --find-links=/wheels/ && rm -f *.whl && rm -rf /wheels

-RUN chmod +x docker/entrypoint.sh
-RUN chmod +x docker/prod_entrypoint.sh
+# Convert Windows line endings to Unix for entrypoint scripts
+RUN sed -i 's/\r$//' docker/entrypoint.sh && chmod +x docker/entrypoint.sh
+RUN sed -i 's/\r$//' docker/prod_entrypoint.sh && chmod +x docker/prod_entrypoint.sh

 EXPOSE 4000/tcp

--- a/docker/Dockerfile.custom_ui
+++ b/docker/Dockerfile.custom_ui
@ -32,8 +32,9 @@ RUN rm -rf /app/litellm/proxy/_experimental/out/* && \
 WORKDIR /app

 # Make sure your docker/entrypoint.sh is executable
-RUN chmod +x docker/entrypoint.sh
-RUN chmod +x docker/prod_entrypoint.sh
+# Convert Windows line endings to Unix for entrypoint scripts
+RUN sed -i 's/\r$//' docker/entrypoint.sh && chmod +x docker/entrypoint.sh
+RUN sed -i 's/\r$//' docker/prod_entrypoint.sh && chmod +x docker/prod_entrypoint.sh

 # Expose the necessary port
 EXPOSE 4000/tcp
--- a/docker/Dockerfile.database
+++ b/docker/Dockerfile.database
@ -27,7 +27,8 @@ RUN python -m pip install build
 COPY . .

 # Build Admin UI
-RUN chmod +x docker/build_admin_ui.sh && ./docker/build_admin_ui.sh
+# Convert Windows line endings to Unix and make executable
+RUN sed -i 's/\r$//' docker/build_admin_ui.sh && chmod +x docker/build_admin_ui.sh && ./docker/build_admin_ui.sh

 # Build the package
 RUN rm -rf dist/* && python -m build
@ -63,20 +64,23 @@ COPY --from=builder /wheels/ /wheels/
 RUN pip install *.whl /wheels/* --no-index --find-links=/wheels/ && rm -f *.whl && rm -rf /wheels

 # Install semantic_router and aurelio-sdk using script
-RUN chmod +x docker/install_auto_router.sh && ./docker/install_auto_router.sh
+# Convert Windows line endings to Unix and make executable
+RUN sed -i 's/\r$//' docker/install_auto_router.sh && chmod +x docker/install_auto_router.sh && ./docker/install_auto_router.sh

 # ensure pyjwt is used, not jwt
 RUN pip uninstall jwt -y
 RUN pip uninstall PyJWT -y
 RUN pip install PyJWT==2.9.0 --no-cache-dir

-# Build Admin UI
-RUN chmod +x docker/build_admin_ui.sh && ./docker/build_admin_ui.sh
+# Build Admin UI (runtime stage)
+# Convert Windows line endings to Unix and make executable
+RUN sed -i 's/\r$//' docker/build_admin_ui.sh && chmod +x docker/build_admin_ui.sh && ./docker/build_admin_ui.sh

 # Generate prisma client
 RUN prisma generate
-RUN chmod +x docker/entrypoint.sh
-RUN chmod +x docker/prod_entrypoint.sh
+# Convert Windows line endings to Unix for entrypoint scripts
+RUN sed -i 's/\r$//' docker/entrypoint.sh && chmod +x docker/entrypoint.sh
+RUN sed -i 's/\r$//' docker/prod_entrypoint.sh && chmod +x docker/prod_entrypoint.sh
 EXPOSE 4000/tcp

 RUN apk add --no-cache supervisor
--- a/docker/Dockerfile.dev
+++ b/docker/Dockerfile.dev
@ -40,7 +40,8 @@ COPY enterprise/ ./enterprise/
 COPY docker/ ./docker/

 # Build Admin UI once
-RUN chmod +x docker/build_admin_ui.sh && ./docker/build_admin_ui.sh
+# Convert Windows line endings to Unix and make executable
+RUN sed -i 's/\r$//' docker/build_admin_ui.sh && chmod +x docker/build_admin_ui.sh && ./docker/build_admin_ui.sh

 # Build the package
 RUN rm -rf dist/* && python -m build
@ -79,8 +80,12 @@ RUN pip install --no-cache-dir *.whl /wheels/* --no-index --find-links=/wheels/
    rm -rf /wheels

 # Generate prisma client and set permissions
+# Convert Windows line endings to Unix for entrypoint scripts
 RUN prisma generate && \
-    chmod +x docker/entrypoint.sh docker/prod_entrypoint.sh
+    sed -i 's/\r$//' docker/entrypoint.sh && \
+    sed -i 's/\r$//' docker/prod_entrypoint.sh && \
+    chmod +x docker/entrypoint.sh && \
+    chmod +x docker/prod_entrypoint.sh

 EXPOSE 4000/tcp

--- a/docker/Dockerfile.non_root
+++ b/docker/Dockerfile.non_root
@ -144,7 +144,10 @@ RUN pip install --no-index --find-links=/wheels/ -r requirements.txt && \
    fi

 # Permissions, cleanup, and Prisma prep
-RUN chmod +x docker/entrypoint.sh docker/prod_entrypoint.sh && \
+# Convert Windows line endings to Unix for entrypoint scripts
+RUN sed -i 's/\r$//' docker/entrypoint.sh && \
+    sed -i 's/\r$//' docker/prod_entrypoint.sh && \
+    chmod +x docker/entrypoint.sh docker/prod_entrypoint.sh && \
    mkdir -p /nonexistent /.npm /var/lib/litellm/assets /var/lib/litellm/ui && \
    chown -R nobody:nogroup /app /var/lib/litellm/ui /var/lib/litellm/assets /nonexistent /.npm && \
    pip uninstall jwt -y || true && \
--- a/litellm/proxy/common_utils/performance_utils.md
+++ b/litellm/proxy/common_utils/performance_utils.md
@ -0,0 +1,214 @@
+# Performance Utilities Documentation
+
+This module provides performance monitoring and profiling functionality for LiteLLM proxy server using `cProfile` and `line_profiler`.
+
+## Table of Contents
+
+- [Line Profiler Usage](#line-profiler-usage)
+  - [Example 1: Wrapping a function directly](#example-1-wrapping-a-function-directly)
+  - [Example 2: Wrapping a module function dynamically](#example-2-wrapping-a-module-function-dynamically)
+  - [Example 3: Manual stats collection](#example-3-manual-stats-collection)
+  - [Example 4: Analyzing the profile output](#example-4-analyzing-the-profile-output)
+  - [Example 5: Using in a decorator pattern](#example-5-using-in-a-decorator-pattern)
+- [cProfile Usage](#cprofile-usage)
+- [Installation](#installation)
+- [Notes](#notes)
+
+## Line Profiler Usage
+
+### Example 1: Wrapping a function directly
+
+This is how it's used in `litellm/utils.py` to profile `wrapper_async`:
+
+```python
+from litellm.proxy.common_utils.performance_utils import (
+    register_shutdown_handler,
+    wrap_function_directly,
+)
+
+def client(original_function):
+    @wraps(original_function)
+    async def wrapper_async(*args, **kwargs):
+        # ... function implementation ...
+        pass
+    
+    # Wrap the function with line_profiler
+    wrapper_async = wrap_function_directly(wrapper_async)
+    
+    # Register shutdown handler to collect stats on server shutdown
+    register_shutdown_handler(output_file="wrapper_async_line_profile.lprof")
+    
+    return wrapper_async
+```
+
+### Example 2: Wrapping a module function dynamically
+
+```python
+import my_module
+from litellm.proxy.common_utils.performance_utils import (
+    wrap_function_with_line_profiler,
+    register_shutdown_handler,
+)
+
+# Wrap a function in a module
+wrap_function_with_line_profiler(my_module, "expensive_function")
+
+# Register shutdown handler
+register_shutdown_handler(output_file="my_profile.lprof")
+
+# Now all calls to my_module.expensive_function will be profiled
+my_module.expensive_function()
+```
+
+### Example 3: Manual stats collection
+
+```python
+from litellm.proxy.common_utils.performance_utils import (
+    wrap_function_directly,
+    collect_line_profiler_stats,
+)
+
+def my_function():
+    # ... implementation ...
+    pass
+
+# Wrap the function
+my_function = wrap_function_directly(my_function)
+
+# Run your code
+my_function()
+
+# Collect stats manually (instead of waiting for shutdown)
+collect_line_profiler_stats(output_file="manual_profile.lprof")
+```
+
+### Example 4: Analyzing the profile output
+
+After running your code, analyze the `.lprof` file:
+
+```bash
+# View the profile
+python -m line_profiler wrapper_async_line_profile.lprof
+
+# Save to text file
+python -m line_profiler wrapper_async_line_profile.lprof > profile_report.txt
+```
+
+The output shows:
+- **Line #**: Line number in the source file
+- **Hits**: Number of times the line was executed
+- **Time**: Total time spent on that line (in microseconds)
+- **Per Hit**: Average time per execution
+- **% Time**: Percentage of total function time
+- **Line Contents**: The actual source code
+
+Example output:
+```
+Timer unit: 1e-06 s
+
+Total time: 3.73697 s
+File: litellm/utils.py
+Function: client.<locals>.wrapper_async at line 1657
+
+Line #      Hits         Time  Per Hit   % Time  Line Contents
+==============================================================
+  1657                                               @wraps(original_function)
+  1658                                               async def wrapper_async(*args, **kwargs):
+  1659      2005       7577.1      3.8      0.2          print_args_passed_to_litellm(...)
+  1763      2005    1351909.0    674.3    36.2          result = await original_function(*args, **kwargs)
+  1846      4010    1543688.1    385.0    41.3          update_response_metadata(...)
+```
+
+### Example 5: Using in a decorator pattern
+
+```python
+from litellm.proxy.common_utils.performance_utils import (
+    wrap_function_directly,
+    register_shutdown_handler,
+)
+
+def profile_decorator(func):
+    # Wrap the function
+    profiled_func = wrap_function_directly(func)
+    
+    # Register shutdown handler (only once)
+    if not hasattr(profile_decorator, '_registered'):
+        register_shutdown_handler(output_file="decorated_functions.lprof")
+        profile_decorator._registered = True
+    
+    return profiled_func
+
+@profile_decorator
+async def my_async_function():
+    # This function will be profiled
+    pass
+```
+
+## cProfile Usage
+
+### Example: Using the profile_endpoint decorator
+
+```python
+from litellm.proxy.common_utils.performance_utils import profile_endpoint
+
+@profile_endpoint(sampling_rate=0.1)  # Profile 10% of requests
+async def my_endpoint():
+    # ... implementation ...
+    pass
+```
+
+The `sampling_rate` parameter controls what percentage of requests are profiled:
+- `1.0`: Profile all requests (100%)
+- `0.1`: Profile 1 in 10 requests (10%)
+- `0.0`: Profile no requests (0%)
+
+## Installation
+
+`line_profiler` must be installed to use the line profiling functionality:
+
+```bash
+pip install line_profiler
+```
+
+On Windows with Python 3.14+, you may need to install Microsoft Visual C++ Build Tools to compile `line_profiler` from source.
+
+## Notes
+
+- The profiler aggregates stats by source code location, so multiple instances of the same function (e.g., closures) will be profiled together
+- Stats are automatically collected on server shutdown via `atexit` handler when using `register_shutdown_handler()`
+- You can also manually collect stats using `collect_line_profiler_stats()`
+- The line profiler will fail with an `ImportError` if `line_profiler` is not installed (as configured in `litellm/utils.py`)
+
+## API Reference
+
+### `wrap_function_directly(func: Callable) -> Callable`
+
+Wrap a function directly with line_profiler. This is the recommended way to profile functions, especially closures or functions created dynamically.
+
+**Raises:**
+- `ImportError`: If line_profiler is not available
+- `RuntimeError`: If line_profiler cannot be enabled or function cannot be wrapped
+
+### `wrap_function_with_line_profiler(module: Any, function_name: str) -> bool`
+
+Dynamically wrap a function in a module with line_profiler.
+
+**Returns:** `True` if wrapping was successful, `False` otherwise
+
+### `collect_line_profiler_stats(output_file: Optional[str] = None) -> None`
+
+Collect and save line_profiler statistics. If `output_file` is provided, saves to file. Otherwise, prints to stdout.
+
+### `register_shutdown_handler(output_file: Optional[str] = None) -> None`
+
+Register an `atexit` handler that will automatically save profiling statistics when the Python process exits. Safe to call multiple times (only registers once).
+
+**Default output file:** `line_profile_stats.lprof` if not specified
+
+### `profile_endpoint(sampling_rate: float = 1.0)`
+
+Decorator to sample endpoint hits and save to a profile file using cProfile.
+
+**Args:**
+- `sampling_rate`: Rate of requests to profile (0.0 to 1.0)
+
--- a/litellm/proxy/common_utils/performance_utils.py
+++ b/litellm/proxy/common_utils/performance_utils.py
@ -2,14 +2,19 @@
 Performance utilities for LiteLLM proxy server.

 This module provides performance monitoring and profiling functionality for endpoint
-performance analysis using cProfile with configurable sampling rates.
+performance analysis using cProfile with configurable sampling rates, and line_profiler
+for line-by-line profiling.
+
+See performance_utils.md for detailed usage examples and documentation.
 """

 import asyncio
+import atexit
 import cProfile
 import functools
 import threading
 from pathlib import Path as PathLib
+from typing import Any, Callable, Optional

 from litellm._logging import verbose_proxy_logger

@ -20,6 +25,11 @@ _last_profile_file_path = None
 _sample_counter = 0
 _sample_counter_lock = threading.Lock()

+# Global line_profiler state
+_line_profiler: Optional[Any] = None
+_line_profiler_lock = threading.Lock()
+_wrapped_functions: dict[str, Callable] = {}  # Store original functions
+

 def _should_sample(profile_sampling_rate: float) -> bool:
    """Determine if current request should be sampled based on sampling rate."""
@ -123,3 +133,154 @@ def profile_endpoint(sampling_rate: float = 1.0):
                    raise
            return sync_wrapper
    return decorator
+
+
+def enable_line_profiler() -> None:
+    """Enable line_profiler for dynamic function wrapping.
+    
+    Raises:
+        ImportError: If line_profiler is not available
+    """
+    global _line_profiler
+    from line_profiler import LineProfiler  # Will raise ImportError if not available
+    
+    with _line_profiler_lock:
+        if _line_profiler is None:
+            _line_profiler = LineProfiler()
+            verbose_proxy_logger.info("Line profiler enabled")
+
+
+def wrap_function_with_line_profiler(module: Any, function_name: str) -> bool:
+    """Dynamically wrap a function with line_profiler.
+    
+    Args:
+        module: The module containing the function
+        function_name: Name of the function to wrap
+        
+    Returns:
+        True if wrapping was successful, False otherwise
+    """
+    if not enable_line_profiler():
+        return False
+    
+    if _line_profiler is None:
+        return False
+    
+    try:
+        original_function = getattr(module, function_name, None)
+        if original_function is None:
+            verbose_proxy_logger.warning(
+                f"Function {function_name} not found in module {module.__name__}"
+            )
+            return False
+        
+        # Store original function if not already wrapped
+        if function_name not in _wrapped_functions:
+            _wrapped_functions[function_name] = original_function
+        
+        # Wrap with line_profiler
+        profiled_function = _line_profiler(original_function)
+        setattr(module, function_name, profiled_function)
+        
+        verbose_proxy_logger.info(
+            f"Wrapped {module.__name__}.{function_name} with line_profiler"
+        )
+        return True
+    except Exception as e:
+        verbose_proxy_logger.error(
+            f"Error wrapping {function_name} with line_profiler: {e}"
+        )
+        return False
+
+
+def wrap_function_directly(func: Callable) -> Callable:
+    """Wrap a function directly with line_profiler.
+    
+    This is the recommended way to profile functions, especially closures or
+    functions created dynamically (like wrapper_async in litellm/utils.py).
+    
+    Args:
+        func: The function to wrap
+        
+    Returns:
+        The wrapped function that will be profiled when called
+        
+    Raises:
+        ImportError: If line_profiler is not available
+        RuntimeError: If line_profiler cannot be enabled or function cannot be wrapped
+    """
+    import warnings
+    
+    enable_line_profiler()  # Will raise ImportError if not available
+    
+    if _line_profiler is None:
+        raise RuntimeError("Line profiler was not initialized")
+    
+    # Suppress warnings about __wrapped__ - we intentionally want to profile the wrapper
+    with warnings.catch_warnings():
+        warnings.filterwarnings('ignore', message='.*__wrapped__.*', category=UserWarning)
+        # Add function to line_profiler and wrap it
+        _line_profiler.add_function(func)
+        profiled_function = _line_profiler(func)
+    
+    verbose_proxy_logger.info(
+        f"Wrapped function {func.__name__} with line_profiler"
+    )
+    return profiled_function
+
+
+def collect_line_profiler_stats(output_file: Optional[str] = None) -> None:
+    """Collect and save line_profiler statistics.
+    
+    This can be called manually to collect stats at any time, or it's
+    automatically called on shutdown if register_shutdown_handler() was used.
+    
+    Args:
+        output_file: Optional path to save stats. If None, prints to stdout.
+    """
+    global _line_profiler
+    
+    with _line_profiler_lock:
+        if _line_profiler is None:
+            verbose_proxy_logger.debug("Line profiler not enabled, nothing to collect")
+            return
+        
+        try:
+            if output_file:
+                # Save to file
+                output_path = PathLib(output_file)
+                _line_profiler.dump_stats(str(output_path))
+                verbose_proxy_logger.info(
+                    f"Line profiler stats saved to {output_path}"
+                )
+            else:
+                # Print to stdout
+                from io import StringIO
+                
+                stream = StringIO()
+                _line_profiler.print_stats(stream=stream)
+                stats_output = stream.getvalue()
+                verbose_proxy_logger.info("Line profiler stats:\n" + stats_output)
+        except Exception as e:
+            verbose_proxy_logger.error(f"Error collecting line profiler stats: {e}")
+
+
+def register_shutdown_handler(output_file: Optional[str] = None) -> None:
+    """Register a shutdown handler to collect line_profiler stats.
+    
+    This registers an atexit handler that will automatically save profiling
+    statistics when the Python process exits. Safe to call multiple times
+    (only registers once).
+    
+    Args:
+        output_file: Optional path to save stats on shutdown.
+                     Defaults to 'line_profile_stats.lprof'
+    """
+    if output_file is None:
+        output_file = "line_profile_stats.lprof"
+    
+    def shutdown_handler():
+        collect_line_profiler_stats(output_file=output_file)
+    
+    atexit.register(shutdown_handler)
+    verbose_proxy_logger.debug(f"Registered line_profiler shutdown handler for {output_file}")