Text-to-SQL: Query Databases with Natural Language Using AI
Let AI translate business questions into SQL queries
Text-to-SQL: Natural Language Database Queries
The Problem
Most business users can't write SQL but need data insights. Text-to-SQL bridges this gap.Simple Text-to-SQL
python
import openaidef text_to_sql(question: str, schema: str) -> str:
response = openai.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "system",
"content": f"""You are a SQL expert. Convert natural language to SQL.
Database schema: {schema}
Return only the SQL query, no explanation."""
},
{"role": "user", "content": question}
]
)
return response.choices[0].message.content.strip()
schema = """
Tables:
orders (id, customer_id, total, created_at, status)
customers (id, name, email, city)
"""sql = text_to_sql("How many orders were placed last month?", schema)
print(sql) # SELECT COUNT(*) FROM orders WHERE created_at >= DATE_TRUNC('month', NOW() - INTERVAL '1 month')
Schema-Aware RAG for Large Databases
python
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chromadef build_schema_index(tables: dict) -> Chroma:
docs = []
for table_name, columns in tables.items():
doc = f"Table: {table_name}\nColumns: {', '.join(columns)}"
docs.append(doc)
embeddings = OpenAIEmbeddings()
return Chroma.from_texts(docs, embeddings)
def smart_text_to_sql(question: str, schema_index: Chroma) -> str:
# Find relevant tables
relevant_docs = schema_index.similarity_search(question, k=5)
relevant_schema = "\n".join([d.page_content for d in relevant_docs])
# Generate SQL with focused schema
return text_to_sql(question, relevant_schema)
Query Validation and Safety
python
import sqlparse
import redef validate_sql(sql: str, readonly: bool = True) -> bool:
if readonly:
dangerous = ['DROP', 'DELETE', 'UPDATE', 'INSERT', 'ALTER', 'CREATE', 'TRUNCATE']
sql_upper = sql.upper()
for keyword in dangerous:
if keyword in sql_upper:
return False
# Parse and validate syntax
parsed = sqlparse.parse(sql)
return len(parsed) > 0 and parsed[0].tokens is not None
def safe_execute(sql: str, db_conn) -> dict:
if not validate_sql(sql):
return {"error": "Invalid or unsafe SQL query"}
try:
results = db_conn.execute(sql)
return {"data": results.fetchall()}
except Exception as e:
return {"error": str(e)}
Iterative Refinement
python
def iterative_sql(question: str, schema: str, db_conn, max_retries: int = 3) -> str:
for attempt in range(max_retries):
sql = text_to_sql(question, schema)
result = safe_execute(sql, db_conn)
if "error" not in result:
return result
# Ask LLM to fix the error
question = f"{question}\n\nPrevious SQL had error: {result['error']}\nPlease fix."
return {"error": "Could not generate valid SQL after retries"}
Also available in 中文.