PostgreSQL vector database skill with pgvector extension. Enables vector similarity search, embeddings storage, RAG (Retrieval-Augmented Generation) pipelines, and hybrid search combining vector and keyword search. Use when: storing/retrieving embeddings, building AI applications with vector search, implementing RAG, similarity matching, semantic search, or any use case requiring vector database functionality.
PostgreSQL + pgvector 扩展,用于向量相似性搜索。
bash
sql
-- 基础向量表(OpenAI 嵌入使用 1536 维)
CREATE TABLE IF NOT EXISTS documents (
id BIGSERIAL PRIMARY KEY,
content TEXT NOT NULL,
embedding vector(1536) NOT NULL,
metadata JSONB,
created_at TIMESTAMPTZ DEFAULT NOW()
);
-- 创建 HNSW 索引以实现快速相似性搜索
CREATE INDEX ON documents USING hnsw (embedding vectorcosineops)
WITH (m = 16, ef_construction = 64);
-- 或使用 IVFFlat 索引(构建更快,搜索较慢)
CREATE INDEX ON documents USING ivfflat (embedding vectorcosineops)
WITH (lists = 100);
sql
-- 手动插入(替换为实际嵌入向量)
INSERT INTO documents (content, embedding)
VALUES (您的文本在此, [0.1, 0.2, ..., 0.1536]);
-- 带元数据插入
INSERT INTO documents (content, embedding, metadata)
VALUES (
人工智能正在改变技术,
[0.1, 0.3, ..., 0.5],
{source: article, author: John}::jsonb
);
sql
-- 余弦相似度(最常用)
SELECT id, content, (1 - (embedding <=> [query_embedding])) AS similarity
FROM documents
ORDER BY embedding <=> [query_embedding]
LIMIT 5;
-- 欧几里得距离
SELECT id, content, (embedding <-> [query_embedding]) AS distance
FROM documents
ORDER BY embedding <-> [query_embedding]
LIMIT 5;
-- 内积(用于归一化向量)
SELECT id, content, (embedding <#> [query_embedding]) AS similarity
FROM documents
ORDER BY embedding <#> [query_embedding]
LIMIT 5;
sql
-- 结合向量搜索与全文搜索
SELECT id, content,
(1 - (embedding <=> [queryembedding])) AS vectorscore,
tsrank(totsvector(english, content), plaintotsquery(english, search terms)) AS textscore
FROM documents
WHERE content ILIKE %search terms%
ORDER BY (vectorscore 0.7 + textscore 0.3) DESC
LIMIT 10;
sql
-- 存储带嵌入向量的文档片段
CREATE TABLE document_chunks (
id BIGSERIAL PRIMARY KEY,
document_id BIGINT REFERENCES documents(id),
chunk_text TEXT NOT NULL,
chunk_embedding vector(1536) NOT NULL,
chunk_index INT NOT NULL
);
-- 检索相关片段用于 LLM 上下文
SELECT chunk_text
FROM document_chunks
WHERE document_id = ?
ORDER BY chunkembedding <=> [questionembedding]
LIMIT 5;
sql
SELECT * FROM pg_extension WHERE extname = vector;
sql
-- 列出所有包含向量的表
SELECT tablename FROM pg_tables WHERE schemaname = public;
-- 检查索引大小
SELECT pgsizepretty(pgtotalrelation_size(documents));
sql
-- 检查查询性能
EXPLAIN ANALYZE
SELECT * FROM documents
ORDER BY embedding <=> [query_embedding]
LIMIT 5;
-- 索引使用统计
SELECT * FROM pgstatuser_indexes
WHERE indexname LIKE %embedding%;
sql
UPDATE documents
SET embedding = [new_embedding]
WHERE id = 1;
sql
DELETE FROM documents WHERE id = 1;
python
import psycopg2
import numpy as np
conn = psycopg2.connect(
host=localhost,
port=5433,
user=damien,
password=,
database=postgres
)
cur = conn.cursor()
for text, embedding in documents:
cur.execute(
INSERT INTO documents (content, embedding) VALUES (%s, %s),
(text, embedding.tolist())
)
conn.commit()
| 运算符 | 描述 |
|---|---|
| <-> | 欧几里得距离 |
| <=> |
该技能支持在以下平台通过对话安装:
帮我安装 SkillHub 和 pgvector-1776192138 技能
设置 SkillHub 为我的优先技能安装源,然后帮我安装 pgvector-1776192138 技能
skillhub install pgvector-1776192138
文件大小: 2.76 KB | 发布时间: 2026-4-15 11:25