accounts/rag-server/sql/schema.sql

66 lines
2.1 KiB
SQL
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

-- ================================================
-- init.sql - Stable RAG schema with Hybrid Search
-- For pgvector ≥ 0.5, BGE-M3 (1024 dims), pg_jieba + english
-- ================================================
SET lock_timeout = '5s';
SET statement_timeout = '0';
-- 1. 必要扩展:向量 + 中文分词
CREATE EXTENSION IF NOT EXISTS vector;
CREATE EXTENSION IF NOT EXISTS pg_jieba;
-- 2. 中文 + 英文混合全文检索配置pg_jieba + simple
-- 自定义配置名jieba_search
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM pg_ts_config WHERE cfgname = 'jieba_search') THEN
CREATE TEXT SEARCH CONFIGURATION jieba_search (PARSER = pg_jieba);
-- pg_jieba 的 token 类型包括word, tag, symbol, number
ALTER TEXT SEARCH CONFIGURATION jieba_search
ADD MAPPING FOR word, tag, symbol, number WITH simple;
END IF;
END$$;
-- 3. 主表结构
CREATE TABLE IF NOT EXISTS public.documents (
id BIGSERIAL PRIMARY KEY,
repo TEXT NOT NULL,
path TEXT NOT NULL,
chunk_id INT NOT NULL,
content TEXT NOT NULL,
embedding VECTOR(1024),
metadata JSONB,
content_sha TEXT NOT NULL,
-- 全文检索字段pg_jieba
content_tsv tsvector GENERATED ALWAYS AS (
setweight(to_tsvector('jieba_search', coalesce(content, '')), 'A')
) STORED,
doc_key TEXT GENERATED ALWAYS AS (
repo || ':' || path || ':' || chunk_id
) STORED,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
-- 4. 唯一约束
CREATE UNIQUE INDEX IF NOT EXISTS documents_doc_key_uk
ON public.documents (doc_key);
-- 5. 向量索引
CREATE INDEX IF NOT EXISTS documents_embedding_idx
ON public.documents
USING hnsw (embedding vector_cosine_ops)
WHERE embedding IS NOT NULL;
-- 6. 全文索引pg_jieba
CREATE INDEX IF NOT EXISTS idx_documents_tsv
ON public.documents USING gin (content_tsv);
-- 7. 复合索引
CREATE INDEX IF NOT EXISTS idx_documents_repo_path
ON public.documents (repo, path);