Supabase + OpenAI: Build a Semantic Search App in 30 Minutes 2026
Build a full-stack semantic search application using Supabase pgvector, OpenAI embeddings, and Next.js with authentication and real-time updates
Supabase + OpenAI: Build a Semantic Search App in 30 Minutes 2026
Semantic search understands meaning, not just keywords. A user searching for "fast car" should find results about "high-performance vehicles" even without exact keyword matches. This tutorial builds a complete semantic search app using Supabase's built-in pgvector support.
Why Supabase for Semantic Search?
What We're Building
A documentation search tool that:
Step 1: Database Setup
sql
-- In Supabase SQL editor-- Enable pgvector extension
CREATE EXTENSION IF NOT EXISTS vector;
-- Create documents table
CREATE TABLE documents (
id BIGSERIAL PRIMARY KEY,
title TEXT NOT NULL,
content TEXT NOT NULL,
source_url TEXT,
embedding VECTOR(1536), -- OpenAI text-embedding-3-small dimension
metadata JSONB DEFAULT '{}',
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- Create index for fast similarity search
CREATE INDEX documents_embedding_idx ON documents
USING ivfflat (embedding vector_cosine_ops)
WITH (lists = 100);
-- Match function for semantic search
CREATE OR REPLACE FUNCTION match_documents(
query_embedding VECTOR(1536),
match_threshold FLOAT DEFAULT 0.7,
match_count INT DEFAULT 10
)
RETURNS TABLE (
id BIGINT,
title TEXT,
content TEXT,
source_url TEXT,
similarity FLOAT
)
LANGUAGE sql
AS $$
SELECT
id,
title,
content,
source_url,
1 - (embedding <=> query_embedding) AS similarity
FROM documents
WHERE 1 - (embedding <=> query_embedding) > match_threshold
ORDER BY embedding <=> query_embedding
LIMIT match_count;
$$;
Step 2: Backend - Indexing Documents
python
import os
from pathlib import Path
from supabase import create_client
from openai import OpenAI
import tiktokenopenai_client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
supabase = create_client(
os.environ["SUPABASE_URL"],
os.environ["SUPABASE_SERVICE_KEY"]
)
def chunk_text(text: str, max_tokens: int = 400) -> list:
enc = tiktoken.get_encoding("cl100k_base")
tokens = enc.encode(text)
chunks = []
for i in range(0, len(tokens), max_tokens - 50): # 50 token overlap
chunk_tokens = tokens[i:i + max_tokens]
chunks.append(enc.decode(chunk_tokens))
return chunks
def generate_embedding(text: str) -> list:
response = openai_client.embeddings.create(
input=text.replace("\n", " "),
model="text-embedding-3-small"
)
return response.data[0].embedding
def index_document(title: str, content: str, source_url: str = None):
chunks = chunk_text(content)
documents = []
for i, chunk in enumerate(chunks):
embedding = generate_embedding(chunk)
documents.append({
"title": f"{title} (part {i+1})" if len(chunks) > 1 else title,
"content": chunk,
"source_url": source_url,
"embedding": embedding
})
result = supabase.table("documents").insert(documents).execute()
print(f"Indexed: {title} ({len(chunks)} chunks)")
return result
def index_directory(directory: str):
for path in Path(directory).rglob("*.md"):
content = path.read_text(encoding="utf-8")
title = path.stem.replace("-", " ").title()
source = str(path.relative_to(directory))
index_document(title, content, source_url=source)
Index your docs
index_directory("./docs")
Step 3: Semantic Search Function
python
def semantic_search(query: str, limit: int = 10, threshold: float = 0.7) -> list:
# Generate query embedding
query_embedding = generate_embedding(query)
# Call Supabase RPC function
result = supabase.rpc(
"match_documents",
{
"query_embedding": query_embedding,
"match_threshold": threshold,
"match_count": limit
}
).execute()
return result.dataTest search
results = semantic_search("How do I configure authentication?")for doc in results:
print(f"[{doc['similarity']:.3f}] {doc['title']}")
print(f" {doc['content'][:200]}...\n")
Step 4: Next.js Frontend
typescript
// src/app/search/page.tsx
'use client';import { useState } from 'react';
import { createClient } from '@/lib/supabase/client';
interface SearchResult {
id: number;
title: string;
content: string;
source_url: string;
similarity: number;
}
export default function SearchPage() {
const [query, setQuery] = useState('');
const [results, setResults] = useState([]);
const [isSearching, setIsSearching] = useState(false);
const handleSearch = async () => {
if (!query.trim()) return;
setIsSearching(true);
try {
const response = await fetch('/api/search', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ query })
});
const data = await response.json();
setResults(data.results);
} finally {
setIsSearching(false);
}
};
return (
Documentation Search
setQuery(e.target.value)}
onKeyDown={(e) => e.key === 'Enter' && handleSearch()}
placeholder='Search by meaning, not just keywords...'
className='flex-1 border rounded px-3 py-2'
/>
{results.map((result) => (
{result.title}
{(result.similarity * 100).toFixed(0)}% match
{result.content.substring(0, 300)}...
{result.source_url && (
View source
)}
))}
);
}
typescript
// src/app/api/search/route.ts
import { NextRequest, NextResponse } from 'next/server';
import { createClient } from '@/lib/supabase/server';
import OpenAI from 'openai';const openai = new OpenAI();
export async function POST(req: NextRequest) {
const { query } = await req.json();
// Generate query embedding
const embeddingResponse = await openai.embeddings.create({
input: query.replace(/\n/g, ' '),
model: 'text-embedding-3-small'
});
const embedding = embeddingResponse.data[0].embedding;
// Search Supabase
const supabase = createClient();
const { data: results, error } = await supabase.rpc('match_documents', {
query_embedding: embedding,
match_threshold: 0.7,
match_count: 10
});
if (error) {
return NextResponse.json({ error: error.message }, { status: 500 });
}
return NextResponse.json({ results });
}
Adding AI-Powered Answers (RAG)
typescript
// Upgrade to RAG: answer questions using retrieved context
export async function POST(req: NextRequest) {
const { query, mode } = await req.json();
// Get relevant documents
const results = await searchDocuments(query);
if (mode === 'answer') {
const context = results.map(r => r.content).join('\n\n');
const answer = await openai.chat.completions.create({
model: 'gpt-4o',
messages: [
{
role: 'system',
content: 'Answer questions based on the provided documentation. Cite sources.'
},
{
role: 'user',
content: Documentation:\n${context}\n\nQuestion: ${query}
}
]
});
return NextResponse.json({
answer: answer.choices[0].message.content,
sources: results
});
}
return NextResponse.json({ results });
}
Performance and Scaling
sql
-- Switch to HNSW index for better performance at scale
DROP INDEX documents_embedding_idx;
CREATE INDEX documents_embedding_hnsw_idx ON documents
USING hnsw (embedding vector_cosine_ops)
WITH (m = 16, ef_construction = 64);
Conclusion
Supabase with pgvector is the fastest path from zero to semantic search—no separate vector database, no extra infrastructure, and it lives right alongside your application data. The 30-minute build time is realistic: database setup (5 min), indexing pipeline (10 min), search API (10 min), basic UI (5 min).
Also available in 中文.