Deploying AI Computer Vision in Production: From Training to Edge
Building scalable vision AI systems for real-world applications
Deploying AI Computer Vision in Production: From Training to Edge
Building scalable vision AI systems for real-world applications
A practical guide to building and deploying computer vision systems at production scale—covering object detection, image classification, video analytics, and edge deployment strategies.
Deploying AI Computer Vision in Production: From Training to Edge
Computer Vision Production Challenges
Research computer vision is about achieving state-of-the-art accuracy on benchmark datasets. Production computer vision is about building systems that work reliably on real-world data, scale to millions of images, and meet strict latency requirements.
Key production challenges:
Modern Computer Vision Architecture Choices
Foundation Models vs. Custom Training
python
Option 1: Fine-tune a foundation model (recommended starting point)
from transformers import AutoFeatureExtractor, AutoModelForImageClassification
import torchStart with a pre-trained vision transformer
model = AutoModelForImageClassification.from_pretrained(
"google/vit-base-patch16-224",
num_labels=your_num_classes,
ignore_mismatched_sizes=True
)Option 2: CLIP for zero-shot classification
from transformers import CLIPProcessor, CLIPModelclip_model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
def zero_shot_classify(image, class_names: list[str]) -> dict:
inputs = processor(
text=class_names,
images=image,
return_tensors="pt",
padding=True
)
outputs = clip_model(**inputs)
probs = outputs.logits_per_image.softmax(dim=1)
return dict(zip(class_names, probs[0].tolist()))
No training required for new categories!
result = zero_shot_classify(
product_image,
["electronics", "clothing", "food", "furniture"]
)
Object Detection for Production
python
YOLO v8 - best balance of speed and accuracy for production
from ultralytics import YOLOTraining
model = YOLO('yolov8n.pt') # Start from pretrained
results = model.train(
data='dataset.yaml',
epochs=100,
imgsz=640,
batch=16,
device='0' # GPU
)Inference with batch processing
model = YOLO('best.pt')Process batch of images efficiently
results = model(
['image1.jpg', 'image2.jpg', ...],
batch=32,
conf=0.5,
iou=0.45
)Export for production
model.export(format='onnx', optimize=True) # ONNX for portability
model.export(format='tflite') # TensorFlow Lite for mobile
model.export(format='engine') # TensorRT for NVIDIA
Building a Production Vision Pipeline
High-Throughput Image Processing
python
import asyncio
import aiohttp
from PIL import Image
import ioclass ProductionVisionPipeline:
def __init__(self, model_path: str, batch_size: int = 32):
self.model = load_optimized_model(model_path)
self.batch_size = batch_size
self.queue = asyncio.Queue(maxsize=1000)
async def process_batch(self, images: list) -> list:
"""GPU-efficient batch processing"""
preprocessed = [self.preprocess(img) for img in images]
batch_tensor = torch.stack(preprocessed).cuda()
with torch.cuda.amp.autocast(): # Mixed precision for 2x speedup
with torch.no_grad():
outputs = self.model(batch_tensor)
return self.postprocess(outputs)
async def worker(self):
"""Continuously process batches from queue"""
while True:
batch = []
# Collect up to batch_size items
try:
for _ in range(self.batch_size):
item = await asyncio.wait_for(
self.queue.get(), timeout=0.1
)
batch.append(item)
except asyncio.TimeoutError:
pass
if batch:
results = await self.process_batch([b['image'] for b in batch])
for item, result in zip(batch, results):
item['future'].set_result(result)
Achieves 500+ images/second on A100 GPU
Edge Deployment
Optimizing for Mobile and Edge Devices
python
Step 1: Quantize for edge deployment
import torch
from torch.quantization import quantize_dynamicPTQ (Post-Training Quantization) - no retraining
quantized = quantize_dynamic(model, {torch.nn.Conv2d, torch.nn.Linear}, dtype=torch.qint8)
4x smaller, 2-3x faster, < 1% accuracy loss
Step 2: Export to TFLite (Android/iOS)
import tensorflow as tfconverter = tf.lite.TFLiteConverter.from_saved_model(saved_model_path)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_types = [tf.float16] # FP16 for GPU acceleration
tflite_model = converter.convert()
Step 3: ONNX for cross-platform
torch.onnx.export(
model,
dummy_input,
"model.onnx",
opset_version=13,
dynamic_axes={'input': {0: 'batch_size'}}
)
On-Device Inference on iPhone
swift
// Core ML model inference on iOS
import CoreML
import Vision
import UIKitclass VisionClassifier {
private let model: VNCoreMLModel
init() throws {
let config = MLModelConfiguration()
config.computeUnits = .all // Use Neural Engine when available
let coreMLModel = try YourModel(configuration: config)
self.model = try VNCoreMLModel(for: coreMLModel.model)
}
func classify(image: UIImage) async throws -> [VNClassificationObservation] {
return try await withCheckedThrowingContinuation { continuation in
let request = VNCoreMLRequest(model: model) { request, error in
if let results = request.results as? [VNClassificationObservation] {
continuation.resume(returning: results)
}
}
let handler = VNImageRequestHandler(
cgImage: image.cgImage!,
options: [:]
)
try? handler.perform([request])
}
}
}
// 30ms inference on iPhone 15 Pro Neural Engine
Production Monitoring and Quality Control
Data Drift Detection
python
from evidently import ColumnMapping
from evidently.report import Report
from evidently.metric_preset import DataDriftPresetdef monitor_vision_data_quality(reference_images, production_images):
"""
Detect when production images differ significantly from training data
"""
# Extract image statistics as features
ref_features = extract_image_features(reference_images)
prod_features = extract_image_features(production_images)
# Evidently drift report
report = Report(metrics=[DataDriftPreset()])
report.run(
reference_data=ref_features,
current_data=prod_features
)
# Alert if drift detected
if report.as_dict()['metrics'][0]['result']['dataset_drift']:
trigger_retraining_alert()
Vision AI Platforms
Key Takeaways
相关工具
相关教程
Modern approaches to personalization that drive conversion and retention
Practical machine learning approaches for accurate business forecasting
A practical guide to deploying natural language processing at enterprise scale