66 lines
2.1 KiB
SQL
66 lines
2.1 KiB
SQL
-- ================================================
|
||
-- init.sql - Stable RAG schema with Hybrid Search
|
||
-- For pgvector ≥ 0.5, BGE-M3 (1024 dims), pg_jieba + english
|
||
-- ================================================
|
||
|
||
SET lock_timeout = '5s';
|
||
SET statement_timeout = '0';
|
||
|
||
-- 1. 必要扩展:向量 + 中文分词
|
||
CREATE EXTENSION IF NOT EXISTS vector;
|
||
CREATE EXTENSION IF NOT EXISTS pg_jieba;
|
||
|
||
-- 2. 中文 + 英文混合全文检索配置(pg_jieba + simple)
|
||
-- 自定义配置名:jieba_search
|
||
DO $$
|
||
BEGIN
|
||
IF NOT EXISTS (SELECT 1 FROM pg_ts_config WHERE cfgname = 'jieba_search') THEN
|
||
CREATE TEXT SEARCH CONFIGURATION jieba_search (PARSER = pg_jieba);
|
||
-- pg_jieba 的 token 类型包括:word, tag, symbol, number
|
||
ALTER TEXT SEARCH CONFIGURATION jieba_search
|
||
ADD MAPPING FOR word, tag, symbol, number WITH simple;
|
||
END IF;
|
||
END$$;
|
||
|
||
-- 3. 主表结构
|
||
CREATE TABLE IF NOT EXISTS public.documents (
|
||
id BIGSERIAL PRIMARY KEY,
|
||
repo TEXT NOT NULL,
|
||
path TEXT NOT NULL,
|
||
chunk_id INT NOT NULL,
|
||
content TEXT NOT NULL,
|
||
embedding VECTOR(1024),
|
||
metadata JSONB,
|
||
content_sha TEXT NOT NULL,
|
||
|
||
-- 全文检索字段(pg_jieba)
|
||
content_tsv tsvector GENERATED ALWAYS AS (
|
||
setweight(to_tsvector('jieba_search', coalesce(content, '')), 'A')
|
||
) STORED,
|
||
|
||
doc_key TEXT GENERATED ALWAYS AS (
|
||
repo || ':' || path || ':' || chunk_id
|
||
) STORED,
|
||
|
||
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||
);
|
||
|
||
-- 4. 唯一约束
|
||
CREATE UNIQUE INDEX IF NOT EXISTS documents_doc_key_uk
|
||
ON public.documents (doc_key);
|
||
|
||
-- 5. 向量索引
|
||
CREATE INDEX IF NOT EXISTS documents_embedding_idx
|
||
ON public.documents
|
||
USING hnsw (embedding vector_cosine_ops)
|
||
WHERE embedding IS NOT NULL;
|
||
|
||
-- 6. 全文索引(pg_jieba)
|
||
CREATE INDEX IF NOT EXISTS idx_documents_tsv
|
||
ON public.documents USING gin (content_tsv);
|
||
|
||
-- 7. 复合索引
|
||
CREATE INDEX IF NOT EXISTS idx_documents_repo_path
|
||
ON public.documents (repo, path);
|