Supabase + OpenAI: Build a Semantic Search App in 30 Minutes 2026
Build a full-stack semantic search application using Supabase pgvector, OpenAI embeddings, and Next.js with authentication and real-time updates
Supabase + OpenAI: Build a Semantic Search App in 30 Minutes 2026
Build a full-stack semantic search application using Supabase pgvector, OpenAI embeddings, and Next.js with authentication and real-time updates
Tutorial for building a production semantic search application using Supabase's pgvector extension with OpenAI embeddings. Covers database setup, embedding generation, similarity search queries, and building a Next.js frontend with real-time search.
Supabase + OpenAI: Build a Semantic Search App in 30 Minutes 2026
Semantic search understands meaning, not just keywords. A user searching for "fast car" should find results about "high-performance vehicles" even without exact keyword matches. This tutorial builds a complete semantic search app using Supabase's built-in pgvector support.
Why Supabase for Semantic Search?
What We're Building
A documentation search tool that:
Step 1: Database Setup
sql
-- In Supabase SQL editor-- Enable pgvector extension
CREATE EXTENSION IF NOT EXISTS vector;
-- Create documents table
CREATE TABLE documents (
id BIGSERIAL PRIMARY KEY,
title TEXT NOT NULL,
content TEXT NOT NULL,
source_url TEXT,
embedding VECTOR(1536), -- OpenAI text-embedding-3-small dimension
metadata JSONB DEFAULT '{}',
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- Create index for fast similarity search
CREATE INDEX documents_embedding_idx ON documents
USING ivfflat (embedding vector_cosine_ops)
WITH (lists = 100);
-- Match function for semantic search
CREATE OR REPLACE FUNCTION match_documents(
query_embedding VECTOR(1536),
match_threshold FLOAT DEFAULT 0.7,
match_count INT DEFAULT 10
)
RETURNS TABLE (
id BIGINT,
title TEXT,
content TEXT,
source_url TEXT,
similarity FLOAT
)
LANGUAGE sql
AS $$
SELECT
id,
title,
content,
source_url,
1 - (embedding <=> query_embedding) AS similarity
FROM documents
WHERE 1 - (embedding <=> query_embedding) > match_threshold
ORDER BY embedding <=> query_embedding
LIMIT match_count;
$$;
Step 2: Backend - Indexing Documents
python
import os
from pathlib import Path
from supabase import create_client
from openai import OpenAI
import tiktokenopenai_client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
supabase = create_client(
os.environ["SUPABASE_URL"],
os.environ["SUPABASE_SERVICE_KEY"]
)
def chunk_text(text: str, max_tokens: int = 400) -> list:
enc = tiktoken.get_encoding("cl100k_base")
tokens = enc.encode(text)
chunks = []
for i in range(0, len(tokens), max_tokens - 50): # 50 token overlap
chunk_tokens = tokens[i:i + max_tokens]
chunks.append(enc.decode(chunk_tokens))
return chunks
def generate_embedding(text: str) -> list:
response = openai_client.embeddings.create(
input=text.replace("\n", " "),
model="text-embedding-3-small"
)
return response.data[0].embedding
def index_document(title: str, content: str, source_url: str = None):
chunks = chunk_text(content)
documents = []
for i, chunk in enumerate(chunks):
embedding = generate_embedding(chunk)
documents.append({
"title": f"{title} (part {i+1})" if len(chunks) > 1 else title,
"content": chunk,
"source_url": source_url,
"embedding": embedding
})
result = supabase.table("documents").insert(documents).execute()
print(f"Indexed: {title} ({len(chunks)} chunks)")
return result
def index_directory(directory: str):
for path in Path(directory).rglob("*.md"):
content = path.read_text(encoding="utf-8")
title = path.stem.replace("-", " ").title()
source = str(path.relative_to(directory))
index_document(title, content, source_url=source)
Index your docs
index_directory("./docs")
Step 3: Semantic Search Function
python
def semantic_search(query: str, limit: int = 10, threshold: float = 0.7) -> list:
# Generate query embedding
query_embedding = generate_embedding(query)
# Call Supabase RPC function
result = supabase.rpc(
"match_documents",
{
"query_embedding": query_embedding,
"match_threshold": threshold,
"match_count": limit
}
).execute()
return result.dataTest search
results = semantic_search("How do I configure authentication?")for doc in results:
print(f"[{doc['similarity']:.3f}] {doc['title']}")
print(f" {doc['content'][:200]}...\n")
Step 4: Next.js Frontend
typescript
// src/app/search/page.tsx
'use client';import { useState } from 'react';
import { createClient } from '@/lib/supabase/client';
interface SearchResult {
id: number;
title: string;
content: string;
source_url: string;
similarity: number;
}
export default function SearchPage() {
const [query, setQuery] = useState('');
const [results, setResults] = useState([]);
const [isSearching, setIsSearching] = useState(false);
const handleSearch = async () => {
if (!query.trim()) return;
setIsSearching(true);
try {
const response = await fetch('/api/search', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ query })
});
const data = await response.json();
setResults(data.results);
} finally {
setIsSearching(false);
}
};
return (
Documentation Search
setQuery(e.target.value)}
onKeyDown={(e) => e.key === 'Enter' && handleSearch()}
placeholder='Search by meaning, not just keywords...'
className='flex-1 border rounded px-3 py-2'
/>
{results.map((result) => (
{result.title}
{(result.similarity * 100).toFixed(0)}% match
{result.content.substring(0, 300)}...
{result.source_url && (
View source
)}
))}
);
}
typescript
// src/app/api/search/route.ts
import { NextRequest, NextResponse } from 'next/server';
import { createClient } from '@/lib/supabase/server';
import OpenAI from 'openai';const openai = new OpenAI();
export async function POST(req: NextRequest) {
const { query } = await req.json();
// Generate query embedding
const embeddingResponse = await openai.embeddings.create({
input: query.replace(/\n/g, ' '),
model: 'text-embedding-3-small'
});
const embedding = embeddingResponse.data[0].embedding;
// Search Supabase
const supabase = createClient();
const { data: results, error } = await supabase.rpc('match_documents', {
query_embedding: embedding,
match_threshold: 0.7,
match_count: 10
});
if (error) {
return NextResponse.json({ error: error.message }, { status: 500 });
}
return NextResponse.json({ results });
}
Adding AI-Powered Answers (RAG)
typescript
// Upgrade to RAG: answer questions using retrieved context
export async function POST(req: NextRequest) {
const { query, mode } = await req.json();
// Get relevant documents
const results = await searchDocuments(query);
if (mode === 'answer') {
const context = results.map(r => r.content).join('\n\n');
const answer = await openai.chat.completions.create({
model: 'gpt-4o',
messages: [
{
role: 'system',
content: 'Answer questions based on the provided documentation. Cite sources.'
},
{
role: 'user',
content: Documentation:\n${context}\n\nQuestion: ${query}
}
]
});
return NextResponse.json({
answer: answer.choices[0].message.content,
sources: results
});
}
return NextResponse.json({ results });
}
Performance and Scaling
sql
-- Switch to HNSW index for better performance at scale
DROP INDEX documents_embedding_idx;
CREATE INDEX documents_embedding_hnsw_idx ON documents
USING hnsw (embedding vector_cosine_ops)
WITH (m = 16, ef_construction = 64);
Conclusion
Supabase with pgvector is the fastest path from zero to semantic search—no separate vector database, no extra infrastructure, and it lives right alongside your application data. The 30-minute build time is realistic: database setup (5 min), indexing pipeline (10 min), search API (10 min), basic UI (5 min).
相关工具
相关教程
Automatically classify, summarize, and draft replies to emails using AI
Build voice AI applications with natural-sounding TTS and custom voice cloning
Transcribe audio files, meetings, and real-time speech with Whisper