Add image downloading and base64 conversion for OpenRouter vision models
This commit is contained in:
parent
afe34159b5
commit
e3a3157f5a
|
|
@ -3,6 +3,7 @@ package services
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
|
"encoding/base64"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
|
@ -85,6 +86,48 @@ type OpenRouterChatRequest struct {
|
||||||
func floatPtr(f float64) *float64 { return &f }
|
func floatPtr(f float64) *float64 { return &f }
|
||||||
func intPtr(i int) *int { return &i }
|
func intPtr(i int) *int { return &i }
|
||||||
|
|
||||||
|
// downloadImage downloads an image from URL and returns base64 encoded data
|
||||||
|
func (s *OpenRouterService) downloadImage(ctx context.Context, url string) (string, error) {
|
||||||
|
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("failed to create image request: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := s.httpClient.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("failed to download image: %w", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return "", fmt.Errorf("image download failed: %d", resp.StatusCode)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Limit image size to 5MB
|
||||||
|
const maxImageSize = 5 * 1024 * 1024
|
||||||
|
limitedReader := io.LimitReader(resp.Body, maxImageSize)
|
||||||
|
|
||||||
|
imageData, err := io.ReadAll(limitedReader)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("failed to read image data: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Detect content type
|
||||||
|
contentType := resp.Header.Get("Content-Type")
|
||||||
|
if contentType == "" {
|
||||||
|
contentType = http.DetectContentType(imageData)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only allow image formats
|
||||||
|
if !strings.HasPrefix(contentType, "image/") {
|
||||||
|
return "", fmt.Errorf("unsupported content type: %s", contentType)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert to base64
|
||||||
|
base64Data := base64.StdEncoding.EncodeToString(imageData)
|
||||||
|
return fmt.Sprintf("data:%s;base64,%s", contentType, base64Data), nil
|
||||||
|
}
|
||||||
|
|
||||||
// OpenRouterChatResponse represents a chat completion response
|
// OpenRouterChatResponse represents a chat completion response
|
||||||
type OpenRouterChatResponse struct {
|
type OpenRouterChatResponse struct {
|
||||||
ID string `json:"id"`
|
ID string `json:"id"`
|
||||||
|
|
@ -334,15 +377,27 @@ func (s *OpenRouterService) callModel(ctx context.Context, modelID, systemPrompt
|
||||||
moderationSuffix := "\n---END CONTENT---\n\nNow output ONLY the JSON moderation result. No other text."
|
moderationSuffix := "\n---END CONTENT---\n\nNow output ONLY the JSON moderation result. No other text."
|
||||||
|
|
||||||
if len(imageURLs) > 0 {
|
if len(imageURLs) > 0 {
|
||||||
// Multimodal content array
|
// Multimodal content array with downloaded images
|
||||||
parts := []map[string]any{}
|
parts := []map[string]any{}
|
||||||
wrappedText := moderationPrefix + textContent + moderationSuffix
|
wrappedText := moderationPrefix + textContent + moderationSuffix
|
||||||
parts = append(parts, map[string]any{"type": "text", "text": wrappedText})
|
parts = append(parts, map[string]any{"type": "text", "text": wrappedText})
|
||||||
|
|
||||||
for _, url := range imageURLs {
|
for _, url := range imageURLs {
|
||||||
parts = append(parts, map[string]any{
|
// Download image and convert to base64
|
||||||
"type": "image_url",
|
base64Image, err := s.downloadImage(ctx, url)
|
||||||
"image_url": map[string]string{"url": url},
|
if err != nil {
|
||||||
})
|
// If download fails, fall back to URL (some models might support it)
|
||||||
|
parts = append(parts, map[string]any{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": map[string]string{"url": url},
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
// Use base64 data
|
||||||
|
parts = append(parts, map[string]any{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": map[string]string{"url": base64Image},
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
messages = append(messages, OpenRouterChatMessage{Role: "user", Content: parts})
|
messages = append(messages, OpenRouterChatMessage{Role: "user", Content: parts})
|
||||||
} else {
|
} else {
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue