perf(spendlogs): optimize old spendlog deletion cron job

This commit is contained in:
Harshit Jain 2026-02-23 19:44:30 +05:30
parent f97ee62fb0
commit c6f60bed71
No known key found for this signature in database
GPG Key ID: 36C392CD4415B4CF
4 changed files with 32 additions and 47 deletions

View File

@ -500,6 +500,7 @@ model LiteLLM_SpendLogs {
agent_id String?
proxy_server_request Json? @default("{}")
@@index([startTime])
@@index([startTime, request_id])
@@index([end_user])
@@index([session_id])
}

View File

@ -76,27 +76,29 @@ class SpendLogCleanup:
"Max logs deleted - 1,00,000, rest of the logs will be deleted in next run"
)
break
# Step 1: Find logs to delete
logs_to_delete = await prisma_client.db.litellm_spendlogs.find_many(
where={"startTime": {"lt": cutoff_date}},
take=self.batch_size,
# Step 1: Find logs and delete them in one go without fetching to application
# Delete in batches, limited by self.batch_size
deleted_count = await prisma_client.db.execute_raw(
"""
DELETE FROM "LiteLLM_SpendLogs"
WHERE "request_id" IN (
SELECT "request_id" FROM "LiteLLM_SpendLogs"
WHERE "startTime" < $1::timestamptz
LIMIT $2
)
""",
cutoff_date,
self.batch_size,
)
verbose_proxy_logger.info(f"Found {len(logs_to_delete)} logs in this batch")
verbose_proxy_logger.info(f"Deleted {deleted_count} logs in this batch")
if not logs_to_delete:
if deleted_count == 0:
verbose_proxy_logger.info(
f"No more logs to delete. Total deleted: {total_deleted}"
)
break
request_ids = [log.request_id for log in logs_to_delete]
# Step 2: Delete them in one go
await prisma_client.db.litellm_spendlogs.delete_many(
where={"request_id": {"in": request_ids}}
)
total_deleted += len(logs_to_delete)
total_deleted += deleted_count
run_count += 1
# Add a small sleep to prevent overwhelming the database

View File

@ -499,6 +499,7 @@ model LiteLLM_SpendLogs {
agent_id String?
proxy_server_request Json? @default("{}")
@@index([startTime])
@@index([startTime, request_id])
@@index([end_user])
@@index([session_id])
}

View File

@ -151,28 +151,16 @@ async def test_should_delete_spend_logs():
@pytest.mark.asyncio
async def test_cleanup_old_spend_logs_batch_deletion():
from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock, patch
from unittest.mock import AsyncMock, MagicMock
# Setup Prisma client
mock_prisma_client = MagicMock()
mock_db = MagicMock()
# Mock spendlogs table
mock_spendlogs = MagicMock()
mock_spendlogs.find_many = AsyncMock()
mock_spendlogs.delete_many = AsyncMock()
# Create 1500 mocked logs with .request_id
mock_logs = [SimpleNamespace(request_id=f"req_{i}") for i in range(1500)]
mock_spendlogs.find_many.side_effect = [
mock_logs[:1000], # Batch 1
mock_logs[1000:], # Batch 2
[], # Done
]
# Mock execute_raw to return deleted counts
mock_db.execute_raw = AsyncMock(side_effect=[1000, 500, 0])
# Wire up mocks
mock_db.litellm_spendlogs = mock_spendlogs
mock_prisma_client.db = mock_db
# Mock Redis cache and pod_lock_manager
@ -189,15 +177,13 @@ async def test_cleanup_old_spend_logs_batch_deletion():
assert cleaner._should_delete_spend_logs() is True
await cleaner.cleanup_old_spend_logs(mock_prisma_client)
# Validate batching and deletion
assert mock_spendlogs.find_many.call_count == 3
assert mock_spendlogs.delete_many.call_count == 2
mock_spendlogs.delete_many.assert_any_call(
where={"request_id": {"in": [f"req_{i}" for i in range(1000)]}}
)
mock_spendlogs.delete_many.assert_any_call(
where={"request_id": {"in": [f"req_{i}" for i in range(1000, 1500)]}}
)
# Validate batching and deletion via raw SQL
assert mock_db.execute_raw.call_count == 3
# Check the first call argument
call_args_sql = mock_db.execute_raw.call_args_list[0][0][0]
assert 'DELETE FROM "LiteLLM_SpendLogs"' in call_args_sql
assert 'WHERE "request_id" IN' in call_args_sql
@pytest.mark.asyncio
@ -208,10 +194,7 @@ async def test_cleanup_old_spend_logs_retention_period_cutoff():
# Setup Prisma client
mock_prisma_client = MagicMock()
mock_db = MagicMock()
mock_spendlogs = MagicMock()
mock_spendlogs.find_many = AsyncMock(return_value=[])
mock_spendlogs.delete_many = AsyncMock()
mock_db.litellm_spendlogs = mock_spendlogs
mock_db.execute_raw = AsyncMock(return_value=0)
mock_prisma_client.db = mock_db
# Mock Redis cache and pod_lock_manager
@ -229,7 +212,7 @@ async def test_cleanup_old_spend_logs_retention_period_cutoff():
await cleaner.cleanup_old_spend_logs(mock_prisma_client)
# Verify the cutoff date is correct
cutoff_date = mock_spendlogs.find_many.call_args[1]["where"]["startTime"]["lt"]
cutoff_date = mock_db.execute_raw.call_args[0][1]
expected_cutoff = datetime.now(timezone.utc) - timedelta(seconds=86400)
assert (
abs((cutoff_date - expected_cutoff).total_seconds()) < 1
@ -242,14 +225,12 @@ async def test_cleanup_old_spend_logs_no_retention_period():
Test that no logs are deleted when no retention period is set
"""
mock_prisma_client = MagicMock()
mock_prisma_client.db.litellm_spendlogs.find_many = AsyncMock()
mock_prisma_client.db.litellm_spendlogs.delete = AsyncMock()
mock_prisma_client.db.execute_raw = AsyncMock()
cleaner = SpendLogCleanup(general_settings={}) # no retention
await cleaner.cleanup_old_spend_logs(mock_prisma_client)
mock_prisma_client.db.litellm_spendlogs.find_many.assert_not_called()
mock_prisma_client.db.litellm_spendlogs.delete.assert_not_called()
mock_prisma_client.db.execute_raw.assert_not_called()
def test_cleanup_batch_size_env_var(monkeypatch):