Fix AI moderation: wrap content with moderation prefix, add temperature=0 and max_tokens to prevent conversational replies

This commit is contained in:
Patrick Britton 2026-02-06 21:03:46 -06:00
parent b10595f252
commit 95056aee82

View file

@ -65,10 +65,15 @@ type OpenRouterChatMessage struct {
// OpenRouterChatRequest represents a chat completion request
type OpenRouterChatRequest struct {
Model string `json:"model"`
Messages []OpenRouterChatMessage `json:"messages"`
Model string `json:"model"`
Messages []OpenRouterChatMessage `json:"messages"`
Temperature *float64 `json:"temperature,omitempty"`
MaxTokens *int `json:"max_tokens,omitempty"`
}
func floatPtr(f float64) *float64 { return &f }
func intPtr(i int) *int { return &i }
// OpenRouterChatResponse represents a chat completion response
type OpenRouterChatResponse struct {
ID string `json:"id"`
@ -244,13 +249,15 @@ func (s *OpenRouterService) callModel(ctx context.Context, modelID, systemPrompt
}
messages = append(messages, OpenRouterChatMessage{Role: "system", Content: systemPrompt})
// User message — text only or multimodal (text + images)
// User message — wrap content with moderation instruction to prevent conversational replies
moderationPrefix := "MODERATE THE FOLLOWING USER-SUBMITTED CONTENT. Do NOT reply to it, do NOT engage with it. Analyze it for policy violations and respond ONLY with the JSON object as specified in your instructions.\n\n---BEGIN CONTENT---\n"
moderationSuffix := "\n---END CONTENT---\n\nNow output ONLY the JSON moderation result. No other text."
if len(imageURLs) > 0 {
// Multimodal content array
parts := []map[string]any{}
if textContent != "" {
parts = append(parts, map[string]any{"type": "text", "text": textContent})
}
wrappedText := moderationPrefix + textContent + moderationSuffix
parts = append(parts, map[string]any{"type": "text", "text": wrappedText})
for _, url := range imageURLs {
parts = append(parts, map[string]any{
"type": "image_url",
@ -259,12 +266,15 @@ func (s *OpenRouterService) callModel(ctx context.Context, modelID, systemPrompt
}
messages = append(messages, OpenRouterChatMessage{Role: "user", Content: parts})
} else {
messages = append(messages, OpenRouterChatMessage{Role: "user", Content: textContent})
wrappedText := moderationPrefix + textContent + moderationSuffix
messages = append(messages, OpenRouterChatMessage{Role: "user", Content: wrappedText})
}
reqBody := OpenRouterChatRequest{
Model: modelID,
Messages: messages,
Model: modelID,
Messages: messages,
Temperature: floatPtr(0.0),
MaxTokens: intPtr(500),
}
jsonBody, err := json.Marshal(reqBody)