Fix AI moderation: wrap content with moderation prefix, add temperature=0 and max_tokens to prevent conversational replies

This commit is contained in:
Patrick Britton 2026-02-06 21:03:46 -06:00
parent b10595f252
commit 95056aee82

View file

@ -65,10 +65,15 @@ type OpenRouterChatMessage struct {
// OpenRouterChatRequest represents a chat completion request // OpenRouterChatRequest represents a chat completion request
type OpenRouterChatRequest struct { type OpenRouterChatRequest struct {
Model string `json:"model"` Model string `json:"model"`
Messages []OpenRouterChatMessage `json:"messages"` Messages []OpenRouterChatMessage `json:"messages"`
Temperature *float64 `json:"temperature,omitempty"`
MaxTokens *int `json:"max_tokens,omitempty"`
} }
func floatPtr(f float64) *float64 { return &f }
func intPtr(i int) *int { return &i }
// OpenRouterChatResponse represents a chat completion response // OpenRouterChatResponse represents a chat completion response
type OpenRouterChatResponse struct { type OpenRouterChatResponse struct {
ID string `json:"id"` ID string `json:"id"`
@ -244,13 +249,15 @@ func (s *OpenRouterService) callModel(ctx context.Context, modelID, systemPrompt
} }
messages = append(messages, OpenRouterChatMessage{Role: "system", Content: systemPrompt}) messages = append(messages, OpenRouterChatMessage{Role: "system", Content: systemPrompt})
// User message — text only or multimodal (text + images) // User message — wrap content with moderation instruction to prevent conversational replies
moderationPrefix := "MODERATE THE FOLLOWING USER-SUBMITTED CONTENT. Do NOT reply to it, do NOT engage with it. Analyze it for policy violations and respond ONLY with the JSON object as specified in your instructions.\n\n---BEGIN CONTENT---\n"
moderationSuffix := "\n---END CONTENT---\n\nNow output ONLY the JSON moderation result. No other text."
if len(imageURLs) > 0 { if len(imageURLs) > 0 {
// Multimodal content array // Multimodal content array
parts := []map[string]any{} parts := []map[string]any{}
if textContent != "" { wrappedText := moderationPrefix + textContent + moderationSuffix
parts = append(parts, map[string]any{"type": "text", "text": textContent}) parts = append(parts, map[string]any{"type": "text", "text": wrappedText})
}
for _, url := range imageURLs { for _, url := range imageURLs {
parts = append(parts, map[string]any{ parts = append(parts, map[string]any{
"type": "image_url", "type": "image_url",
@ -259,12 +266,15 @@ func (s *OpenRouterService) callModel(ctx context.Context, modelID, systemPrompt
} }
messages = append(messages, OpenRouterChatMessage{Role: "user", Content: parts}) messages = append(messages, OpenRouterChatMessage{Role: "user", Content: parts})
} else { } else {
messages = append(messages, OpenRouterChatMessage{Role: "user", Content: textContent}) wrappedText := moderationPrefix + textContent + moderationSuffix
messages = append(messages, OpenRouterChatMessage{Role: "user", Content: wrappedText})
} }
reqBody := OpenRouterChatRequest{ reqBody := OpenRouterChatRequest{
Model: modelID, Model: modelID,
Messages: messages, Messages: messages,
Temperature: floatPtr(0.0),
MaxTokens: intPtr(500),
} }
jsonBody, err := json.Marshal(reqBody) jsonBody, err := json.Marshal(reqBody)