diff --git a/litellm/caching/qdrant_semantic_cache.py b/litellm/caching/qdrant_semantic_cache.py
index 9206f326ec..cb521efca0 100644
--- a/litellm/caching/qdrant_semantic_cache.py
+++ b/litellm/caching/qdrant_semantic_cache.py
@@ -12,7 +12,7 @@ import ast
 import asyncio
 import json
 import os
-from typing import Any, Dict, Optional, cast
+from typing import Any, Dict, cast
 
 import litellm
 from litellm._logging import print_verbose
@@ -27,9 +27,6 @@ from .base_cache import BaseCache
 
 class QdrantSemanticCache(BaseCache):
     CACHE_KEY_FIELD_NAME = "litellm_cache_key"
-    ALLOW_LEGACY_UNSCOPED_HITS_ENV_VAR = (
-        "LITELLM_SEMANTIC_CACHE_ALLOW_LEGACY_UNSCOPED_HITS"
-    )
 
     def __init__(  # noqa: PLR0915
         self,
@@ -41,7 +38,6 @@ class QdrantSemanticCache(BaseCache):
         embedding_model="text-embedding-ada-002",
         host_type=None,
         vector_size=None,
-        allow_legacy_unscoped_cache_hits: Optional[bool] = None,
     ):
         from litellm.llms.custom_httpx.http_handler import (
             _get_httpx_client,
@@ -62,16 +58,6 @@ class QdrantSemanticCache(BaseCache):
             raise Exception("similarity_threshold must be provided, passed None")
         self.similarity_threshold = similarity_threshold
         self.embedding_model = embedding_model
-        self.allow_legacy_unscoped_cache_hits = (
-            self._get_allow_legacy_unscoped_cache_hits(allow_legacy_unscoped_cache_hits)
-        )
-        if self.allow_legacy_unscoped_cache_hits:
-            print_verbose(
-                "Qdrant semantic-cache legacy unscoped hits are enabled via "
-                f"{self.ALLOW_LEGACY_UNSCOPED_HITS_ENV_VAR}; searches may return "
-                "pre-isolation cache entries without cache-key payloads. Disable "
-                "this after warming the key-scoped semantic cache."
-            )
         self.vector_size = (
             vector_size if vector_size is not None else QDRANT_VECTOR_SIZE
         )
@@ -180,14 +166,6 @@ class QdrantSemanticCache(BaseCache):
             else:
                 raise Exception("Error while creating new collection")
 
-    @classmethod
-    def _get_allow_legacy_unscoped_cache_hits(
-        cls, allow_legacy_unscoped_cache_hits: Optional[bool]
-    ) -> bool:
-        if allow_legacy_unscoped_cache_hits is not None:
-            return allow_legacy_unscoped_cache_hits
-        return os.getenv(cls.ALLOW_LEGACY_UNSCOPED_HITS_ENV_VAR, "").lower() == "true"
-
     def _get_cache_logic(self, cached_response: Any):
         if cached_response is None:
             return cached_response
@@ -210,8 +188,6 @@ class QdrantSemanticCache(BaseCache):
         }
 
     def _add_cache_key_filter_to_search_data(self, data: dict, key: str) -> None:
-        if getattr(self, "allow_legacy_unscoped_cache_hits", False):
-            return
         data["filter"] = self._get_qdrant_cache_key_filter(key)
 
     def _ensure_cache_key_payload_index(self) -> None:
@@ -236,14 +212,11 @@ class QdrantSemanticCache(BaseCache):
             )
 
     def _payload_matches_cache_key(self, payload: dict, key: str) -> bool:
-        # Legacy Qdrant semantic-cache points stored only prompt text and
-        # response. They cannot be reassigned to the generated LiteLLM cache key
-        # without risking cross-scope hits, so secure mode treats them as misses.
+        # Pre-isolation points stored only prompt + response with no cache-key
+        # payload field. Reassigning them to a caller's key would risk
+        # cross-scope hits, so they're treated as misses and re-populated on
+        # the next set_cache.
         cached_key = payload.get(self.CACHE_KEY_FIELD_NAME)
-        if cached_key is None and getattr(
-            self, "allow_legacy_unscoped_cache_hits", False
-        ):
-            return True
         return cached_key is not None and str(cached_key) == str(key)
 
     async def _get_async_embedding(self, prompt: str, **kwargs) -> Any:
diff --git a/litellm/caching/redis_semantic_cache.py b/litellm/caching/redis_semantic_cache.py
index 35fed977cc..da9e7b1e58 100644
--- a/litellm/caching/redis_semantic_cache.py
+++ b/litellm/caching/redis_semantic_cache.py
@@ -36,9 +36,6 @@ class RedisSemanticCache(BaseCache):
 
     DEFAULT_REDIS_INDEX_NAME: str = "litellm_semantic_cache_index"
     CACHE_KEY_FIELD_NAME: str = "litellm_cache_key"
-    ALLOW_LEGACY_UNSCOPED_HITS_ENV_VAR: str = (
-        "LITELLM_SEMANTIC_CACHE_ALLOW_LEGACY_UNSCOPED_HITS"
-    )
 
     def __init__(
         self,
@@ -49,7 +46,6 @@ class RedisSemanticCache(BaseCache):
         similarity_threshold: Optional[float] = None,
         embedding_model: str = "text-embedding-ada-002",
         index_name: Optional[str] = None,
-        allow_legacy_unscoped_cache_hits: Optional[bool] = None,
         **kwargs,
     ):
         """
@@ -91,10 +87,6 @@ class RedisSemanticCache(BaseCache):
         # While similarity: 1 = most similar, 0 = least similar
         self.distance_threshold = 1 - similarity_threshold
         self.embedding_model = embedding_model
-        self.allow_legacy_unscoped_cache_hits = (
-            self._get_allow_legacy_unscoped_cache_hits(allow_legacy_unscoped_cache_hits)
-        )
-        self._using_legacy_unscoped_index = False
 
         # Set up Redis connection
         if redis_url is None:
@@ -125,14 +117,6 @@ class RedisSemanticCache(BaseCache):
             cache_vectorizer=cache_vectorizer,
         )
 
-    @classmethod
-    def _get_allow_legacy_unscoped_cache_hits(
-        cls, allow_legacy_unscoped_cache_hits: Optional[bool]
-    ) -> bool:
-        if allow_legacy_unscoped_cache_hits is not None:
-            return allow_legacy_unscoped_cache_hits
-        return os.getenv(cls.ALLOW_LEGACY_UNSCOPED_HITS_ENV_VAR, "").lower() == "true"
-
     @classmethod
     def _cache_key_filterable_field(cls) -> Dict[str, str]:
         return {
@@ -167,22 +151,6 @@ class RedisSemanticCache(BaseCache):
             if not _is_schema_mismatch(exc):
                 raise
 
-            if self.allow_legacy_unscoped_cache_hits:
-                self._using_legacy_unscoped_index = True
-                print_verbose(
-                    "Redis semantic-cache legacy unscoped hits are enabled via "
-                    f"{self.ALLOW_LEGACY_UNSCOPED_HITS_ENV_VAR}; reusing existing "
-                    "index without cache-key isolation. Disable this after warming "
-                    "the isolated semantic cache."
-                )
-                return semantic_cache_cls(
-                    name=index_name,
-                    redis_url=redis_url,
-                    vectorizer=cache_vectorizer,
-                    distance_threshold=self.distance_threshold,
-                    overwrite=False,
-                )
-
             isolated_index_name = f"{index_name}_isolated"
             print_verbose(
                 "Redis semantic-cache existing index schema is not isolated; "
@@ -223,11 +191,11 @@ class RedisSemanticCache(BaseCache):
         return Tag(self.CACHE_KEY_FIELD_NAME) == str(key)
 
     def _cache_hit_matches_key(self, cache_hit: Dict[str, Any], key: str) -> bool:
+        # Pre-isolation entries with no ``litellm_cache_key`` field cannot be
+        # safely reassigned to a caller's scope and are treated as misses.
         cached_key = cache_hit.get(self.CACHE_KEY_FIELD_NAME)
         if isinstance(cached_key, bytes):
             cached_key = cached_key.decode("utf-8")
-        if cached_key is None and getattr(self, "_using_legacy_unscoped_index", False):
-            return True
         return cached_key is not None and str(cached_key) == str(key)
 
     def _get_ttl(self, **kwargs) -> Optional[int]:
@@ -319,9 +287,9 @@ class RedisSemanticCache(BaseCache):
             prompt = get_str_from_messages(messages)
             value_str = str(value)
 
-            store_kwargs: Dict[str, Any] = {}
-            if not getattr(self, "_using_legacy_unscoped_index", False):
-                store_kwargs["filters"] = self._get_cache_filters(key)
+            store_kwargs: Dict[str, Any] = {
+                "filters": self._get_cache_filters(key),
+            }
 
             # Get TTL and store in Redis semantic cache
             ttl = self._get_ttl(**kwargs)
@@ -357,11 +325,10 @@ class RedisSemanticCache(BaseCache):
             prompt = get_str_from_messages(messages)
             # Check the cache for semantically similar prompts in this exact
             # LiteLLM cache-key scope.
-            check_kwargs: Dict[str, Any] = {"prompt": prompt}
-            if not getattr(self, "_using_legacy_unscoped_index", False):
-                check_kwargs["filter_expression"] = (
-                    self._get_cache_key_filter_expression(key)
-                )
+            check_kwargs: Dict[str, Any] = {
+                "prompt": prompt,
+                "filter_expression": self._get_cache_key_filter_expression(key),
+            }
             results = self.llmcache.check(**check_kwargs)
 
             # Return None if no similar prompts found
@@ -475,9 +442,8 @@ class RedisSemanticCache(BaseCache):
 
             store_kwargs: Dict[str, Any] = {
                 "vector": prompt_embedding,
+                "filters": self._get_cache_filters(key),
             }
-            if not getattr(self, "_using_legacy_unscoped_index", False):
-                store_kwargs["filters"] = self._get_cache_filters(key)
 
             # Get TTL and store in Redis semantic cache
             ttl = self._get_ttl(**kwargs)
@@ -522,11 +488,8 @@ class RedisSemanticCache(BaseCache):
             check_kwargs: Dict[str, Any] = {
                 "prompt": prompt,
                 "vector": prompt_embedding,
+                "filter_expression": self._get_cache_key_filter_expression(key),
             }
-            if not getattr(self, "_using_legacy_unscoped_index", False):
-                check_kwargs["filter_expression"] = (
-                    self._get_cache_key_filter_expression(key)
-                )
             results = await self.llmcache.acheck(**check_kwargs)
 
             # handle results / cache hit
diff --git a/tests/test_litellm/caching/test_qdrant_semantic_cache.py b/tests/test_litellm/caching/test_qdrant_semantic_cache.py
index 9b987a6d4f..949e6ccc29 100644
--- a/tests/test_litellm/caching/test_qdrant_semantic_cache.py
+++ b/tests/test_litellm/caching/test_qdrant_semantic_cache.py
@@ -208,76 +208,6 @@ def test_qdrant_semantic_cache_rejects_unscoped_cache_hit():
         assert metadata["semantic-similarity"] == 0.0
 
 
-def test_qdrant_semantic_cache_allows_legacy_unscoped_hit_with_flag(monkeypatch):
-    monkeypatch.setenv("LITELLM_SEMANTIC_CACHE_ALLOW_LEGACY_UNSCOPED_HITS", "true")
-
-    with (
-        patch(
-            "litellm.llms.custom_httpx.http_handler._get_httpx_client"
-        ) as mock_sync_client,
-        patch("litellm.llms.custom_httpx.http_handler.get_async_httpx_client"),
-    ):
-
-        mock_response = MagicMock()
-        mock_response.status_code = 200
-        mock_response.json.return_value = {"result": {"exists": True}}
-
-        mock_sync_client_instance = MagicMock()
-        mock_sync_client_instance.get.return_value = mock_response
-        mock_sync_client.return_value = mock_sync_client_instance
-
-        from litellm.caching.qdrant_semantic_cache import QdrantSemanticCache
-
-        qdrant_cache = QdrantSemanticCache(
-            collection_name="test_collection",
-            qdrant_api_base="http://test.qdrant.local",
-            qdrant_api_key="test_key",
-            similarity_threshold=0.8,
-        )
-
-        mock_search_response = MagicMock()
-        mock_search_response.status_code = 200
-        mock_search_response.json.return_value = {
-            "result": [
-                {
-                    "payload": {
-                        "text": "What is the capital of France?",
-                        "response": '{"id": "test-123"}',
-                    },
-                    "score": 0.9,
-                }
-            ]
-        }
-        qdrant_cache.sync_client.post = MagicMock(return_value=mock_search_response)
-
-        with patch(
-            "litellm.embedding", return_value={"data": [{"embedding": [0.1, 0.2, 0.3]}]}
-        ):
-            metadata = {}
-            result = qdrant_cache.get_cache(
-                key="test_key",
-                messages=[{"content": "What is the capital of France?"}],
-                metadata=metadata,
-            )
-
-        assert result == {"id": "test-123"}
-        assert metadata["semantic-similarity"] == 0.9
-        assert "filter" not in qdrant_cache.sync_client.post.call_args.kwargs["json"]
-
-
-def test_qdrant_semantic_cache_legacy_mode_rejects_wrong_key_hit():
-    from litellm.caching.qdrant_semantic_cache import QdrantSemanticCache
-
-    qdrant_cache = QdrantSemanticCache.__new__(QdrantSemanticCache)
-    qdrant_cache.allow_legacy_unscoped_cache_hits = True
-
-    assert qdrant_cache._payload_matches_cache_key(payload={}, key="test_key")
-    assert not qdrant_cache._payload_matches_cache_key(
-        payload={QdrantSemanticCache.CACHE_KEY_FIELD_NAME: "other_key"},
-        key="test_key",
-    )
-
-
 def test_qdrant_semantic_cache_payload_index_failure_is_non_blocking():
     from litellm.caching.qdrant_semantic_cache import QdrantSemanticCache
 
diff --git a/tests/test_litellm/caching/test_redis_semantic_cache.py b/tests/test_litellm/caching/test_redis_semantic_cache.py
index bebe1f757b..b50a35ef50 100644
--- a/tests/test_litellm/caching/test_redis_semantic_cache.py
+++ b/tests/test_litellm/caching/test_redis_semantic_cache.py
@@ -232,46 +232,6 @@ def test_redis_semantic_cache_uses_isolated_index_for_old_schema(monkeypatch):
         ]
 
 
-def test_redis_semantic_cache_can_reuse_legacy_unscoped_index(monkeypatch):
-    fallback_cache_mock = MagicMock()
-    semantic_cache_mock = MagicMock(
-        side_effect=[
-            ValueError("Existing index schema does not match"),
-            fallback_cache_mock,
-        ]
-    )
-    custom_vectorizer_mock = MagicMock()
-
-    with patch.dict(
-        "sys.modules",
-        {
-            "redisvl.extensions.llmcache": MagicMock(SemanticCache=semantic_cache_mock),
-            "redisvl.utils.vectorize": MagicMock(
-                CustomTextVectorizer=custom_vectorizer_mock
-            ),
-        },
-    ):
-        from litellm.caching.redis_semantic_cache import RedisSemanticCache
-
-        monkeypatch.setenv("REDIS_HOST", "localhost")
-        monkeypatch.setenv("REDIS_PORT", "6379")
-        monkeypatch.setenv("REDIS_PASSWORD", "test_password")
-        monkeypatch.setenv(
-            RedisSemanticCache.ALLOW_LEGACY_UNSCOPED_HITS_ENV_VAR, "true"
-        )
-
-        redis_semantic_cache = RedisSemanticCache(
-            similarity_threshold=0.8,
-            index_name="existing_index",
-        )
-
-        assert redis_semantic_cache.llmcache is fallback_cache_mock
-        assert redis_semantic_cache._using_legacy_unscoped_index is True
-        assert semantic_cache_mock.call_count == 2
-        assert semantic_cache_mock.call_args_list[1].kwargs["name"] == "existing_index"
-        assert "filterable_fields" not in semantic_cache_mock.call_args_list[1].kwargs
-
-
 def test_redis_semantic_cache_overwrites_stale_isolated_index(monkeypatch):
     fallback_cache_mock = MagicMock()
     semantic_cache_mock = MagicMock(
@@ -372,11 +332,12 @@ def test_redis_semantic_cache_matches_bytes_cache_key():
     )
 
 
-def test_redis_semantic_cache_allows_unscoped_hit_only_in_legacy_mode():
+def test_redis_semantic_cache_rejects_pre_isolation_unscoped_hit():
+    """Pre-isolation entries with no cache-key field cannot be safely
+    reassigned to a caller's scope and are treated as misses."""
     from litellm.caching.redis_semantic_cache import RedisSemanticCache
 
     redis_semantic_cache = RedisSemanticCache.__new__(RedisSemanticCache)
-    redis_semantic_cache._using_legacy_unscoped_index = False
 
     cache_hit = {
         "prompt": "What is the capital of France?",
@@ -388,12 +349,6 @@ def test_redis_semantic_cache_allows_unscoped_hit_only_in_legacy_mode():
         key="test_key",
     )
 
-    redis_semantic_cache._using_legacy_unscoped_index = True
-    assert redis_semantic_cache._cache_hit_matches_key(
-        cache_hit=cache_hit,
-        key="test_key",
-    )
-
 
 def test_redis_semantic_cache_builds_filter_expression(monkeypatch):
     class FakeTag: