Add image downloading and base64 conversion for OpenRouter vision models

This commit is contained in:
Patrick Britton 2026-02-16 09:13:55 -06:00
parent afe34159b5
commit e3a3157f5a

View file

@ -3,6 +3,7 @@ package services
import ( import (
"bytes" "bytes"
"context" "context"
"encoding/base64"
"encoding/json" "encoding/json"
"fmt" "fmt"
"io" "io"
@ -85,6 +86,48 @@ type OpenRouterChatRequest struct {
func floatPtr(f float64) *float64 { return &f } func floatPtr(f float64) *float64 { return &f }
func intPtr(i int) *int { return &i } func intPtr(i int) *int { return &i }
// downloadImage downloads an image from URL and returns base64 encoded data
func (s *OpenRouterService) downloadImage(ctx context.Context, url string) (string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", fmt.Errorf("failed to create image request: %w", err)
}
resp, err := s.httpClient.Do(req)
if err != nil {
return "", fmt.Errorf("failed to download image: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("image download failed: %d", resp.StatusCode)
}
// Limit image size to 5MB
const maxImageSize = 5 * 1024 * 1024
limitedReader := io.LimitReader(resp.Body, maxImageSize)
imageData, err := io.ReadAll(limitedReader)
if err != nil {
return "", fmt.Errorf("failed to read image data: %w", err)
}
// Detect content type
contentType := resp.Header.Get("Content-Type")
if contentType == "" {
contentType = http.DetectContentType(imageData)
}
// Only allow image formats
if !strings.HasPrefix(contentType, "image/") {
return "", fmt.Errorf("unsupported content type: %s", contentType)
}
// Convert to base64
base64Data := base64.StdEncoding.EncodeToString(imageData)
return fmt.Sprintf("data:%s;base64,%s", contentType, base64Data), nil
}
// OpenRouterChatResponse represents a chat completion response // OpenRouterChatResponse represents a chat completion response
type OpenRouterChatResponse struct { type OpenRouterChatResponse struct {
ID string `json:"id"` ID string `json:"id"`
@ -334,15 +377,27 @@ func (s *OpenRouterService) callModel(ctx context.Context, modelID, systemPrompt
moderationSuffix := "\n---END CONTENT---\n\nNow output ONLY the JSON moderation result. No other text." moderationSuffix := "\n---END CONTENT---\n\nNow output ONLY the JSON moderation result. No other text."
if len(imageURLs) > 0 { if len(imageURLs) > 0 {
// Multimodal content array // Multimodal content array with downloaded images
parts := []map[string]any{} parts := []map[string]any{}
wrappedText := moderationPrefix + textContent + moderationSuffix wrappedText := moderationPrefix + textContent + moderationSuffix
parts = append(parts, map[string]any{"type": "text", "text": wrappedText}) parts = append(parts, map[string]any{"type": "text", "text": wrappedText})
for _, url := range imageURLs { for _, url := range imageURLs {
// Download image and convert to base64
base64Image, err := s.downloadImage(ctx, url)
if err != nil {
// If download fails, fall back to URL (some models might support it)
parts = append(parts, map[string]any{ parts = append(parts, map[string]any{
"type": "image_url", "type": "image_url",
"image_url": map[string]string{"url": url}, "image_url": map[string]string{"url": url},
}) })
} else {
// Use base64 data
parts = append(parts, map[string]any{
"type": "image_url",
"image_url": map[string]string{"url": base64Image},
})
}
} }
messages = append(messages, OpenRouterChatMessage{Role: "user", Content: parts}) messages = append(messages, OpenRouterChatMessage{Role: "user", Content: parts})
} else { } else {