Vector Database Skill
Provides comprehensive vector database capabilities for the Golden Armada AI Agent Fleet Platform.
When to Use This Skill
Activate this skill when working with:
-
Semantic search implementation
-
RAG (Retrieval Augmented Generation)
-
Embedding storage and retrieval
-
Similarity search
-
Vector index management
Embedding Generation
import openai
from anthropic import Anthropic
OpenAI embeddings
def get_openai_embedding(text: str) -> list[float]:
response = openai.embeddings.create(
model="text-embedding-3-small",
input=text
)
return response.data[0].embedding
Batch embeddings
def get_batch_embeddings(texts: list[str]) -> list[list[float]]:
response = openai.embeddings.create(
model="text-embedding-3-small",
input=texts
)
return [item.embedding for item in response.data]
Pinecone
from pinecone import Pinecone, ServerlessSpec
Initialize
pc = Pinecone(api_key=os.environ["PINECONE_API_KEY"])
Create index
pc.create_index(
name="agents",
dimension=1536,
metric="cosine",
spec=ServerlessSpec(
cloud="aws",
region="us-west-2"
)
)
Get index
index = pc.Index("agents")
Upsert vectors
index.upsert(
vectors=[
{
"id": "agent-1",
"values": embedding,
"metadata": {
"name": "Claude Agent",
"type": "claude",
"description": "General assistant"
}
}
],
namespace="production"
)
Query
results = index.query(
vector=query_embedding,
top_k=10,
include_metadata=True,
namespace="production",
filter={"type": {"$eq": "claude"}}
)
for match in results.matches:
print(f"{match.id}: {match.score} - {match.metadata}")
Delete
index.delete(ids=["agent-1"], namespace="production")
Chroma
import chromadb
from chromadb.config import Settings
Initialize
client = chromadb.PersistentClient(path="./chroma_db")
Create collection
collection = client.get_or_create_collection(
name="agents",
metadata={"hnsw:space": "cosine"}
)
Add documents
collection.add(
ids=["agent-1", "agent-2"],
embeddings=[embedding1, embedding2],
documents=["Document 1 text", "Document 2 text"],
metadatas=[
{"type": "claude", "version": "3"},
{"type": "gpt", "version": "4"}
]
)
Query
results = collection.query(
query_embeddings=[query_embedding],
n_results=10,
where={"type": "claude"},
include=["documents", "metadatas", "distances"]
)
Update
collection.update(
ids=["agent-1"],
metadatas=[{"type": "claude", "version": "3.5"}]
)
Delete
collection.delete(ids=["agent-1"])
pgvector (PostgreSQL)
-- Enable extension
CREATE EXTENSION vector;
-- Create table
CREATE TABLE documents (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
content TEXT NOT NULL,
embedding vector(1536),
metadata JSONB DEFAULT '{}'
);
-- Create index (IVFFlat for larger datasets)
CREATE INDEX ON documents USING ivfflat (embedding vector_cosine_ops)
WITH (lists = 100);
-- Or HNSW for better recall
CREATE INDEX ON documents USING hnsw (embedding vector_cosine_ops);
-- Insert
INSERT INTO documents (content, embedding, metadata)
VALUES ('Document content', '[0.1, 0.2, ...]', '{"type": "manual"}');
-- Similarity search
SELECT id, content, 1 - (embedding <=> $1) AS similarity
FROM documents
ORDER BY embedding <=> $1
LIMIT 10;
-- With filter
SELECT id, content, 1 - (embedding <=> $1) AS similarity
FROM documents
WHERE metadata->>'type' = 'manual'
ORDER BY embedding <=> $1
LIMIT 10;
Python with pgvector
from pgvector.sqlalchemy import Vector
from sqlalchemy import Column, String, JSON
from sqlalchemy.dialects.postgresql import UUID
class Document(Base):
tablename = 'documents'
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
content = Column(String, nullable=False)
embedding = Column(Vector(1536))
metadata = Column(JSON, default={})
Insert
doc = Document(
content="Document content",
embedding=embedding,
metadata={"type": "manual"}
)
session.add(doc)
session.commit()
Query
from sqlalchemy import select
from pgvector.sqlalchemy import cosine_distance
results = session.execute(
select(Document)
.order_by(cosine_distance(Document.embedding, query_embedding))
.limit(10)
).scalars().all()
RAG Implementation
class RAGService:
def init(self, vector_store, llm_client):
self.vector_store = vector_store
self.llm = llm_client
async def query(self, question: str, top_k: int = 5) -> str:
# 1. Generate query embedding
query_embedding = await self.get_embedding(question)
# 2. Retrieve relevant documents
docs = await self.vector_store.search(
embedding=query_embedding,
top_k=top_k
)
# 3. Build context
context = "\n\n".join([
f"Document {i+1}:\n{doc.content}"
for i, doc in enumerate(docs)
])
# 4. Generate response
prompt = f"""Answer the question based on the following context.
Context:
{context}
Question: {question}
Answer:"""
response = await self.llm.generate(prompt)
return response
async def add_document(self, content: str, metadata: dict = None):
# Chunk document
chunks = self.chunk_text(content)
# Generate embeddings
embeddings = await self.get_batch_embeddings(chunks)
# Store
for i, (chunk, embedding) in enumerate(zip(chunks, embeddings)):
await self.vector_store.upsert(
id=f"{metadata.get('doc_id', 'doc')}-{i}",
embedding=embedding,
content=chunk,
metadata=metadata
)
def chunk_text(self, text: str, chunk_size: int = 1000, overlap: int = 200) -> list[str]:
chunks = []
start = 0
while start < len(text):
end = start + chunk_size
chunk = text[start:end]
chunks.append(chunk)
start += chunk_size - overlap
return chunks
Best Practices
-
Choose appropriate index type (HNSW for recall, IVFFlat for scale)
-
Chunk documents appropriately (typically 500-1000 tokens)
-
Include overlap between chunks (10-20%)
-
Store metadata for filtering
-
Use namespaces/collections to organize data
-
Monitor query latency and index performance
-
Batch operations for bulk inserts