From 66fe4bd60ef81691203ca54d90b3b23259f8246d Mon Sep 17 00:00:00 2001
From: Patrick Britton <admin@pbritton.dev>
Date: Fri, 6 Feb 2026 11:12:00 -0600
Subject: [PATCH] Fix OpenAI Moderation API: correct response parsing, use
 omni-moderation-latest model

---
 .../internal/services/moderation_service.go   | 66 +++++++++++--------
 1 file changed, 38 insertions(+), 28 deletions(-)

diff --git a/go-backend/internal/services/moderation_service.go b/go-backend/internal/services/moderation_service.go
index 788d1c7..5016251 100644
--- a/go-backend/internal/services/moderation_service.go
+++ b/go-backend/internal/services/moderation_service.go
@@ -42,26 +42,30 @@ type ThreePoisonsScore struct {
 type OpenAIModerationResponse struct {
 	Results []struct {
 		Categories struct {
-			Hate                 float64 `json:"hate"`
-			HateThreatening      float64 `json:"hate/threatening"`
-			SelfHarm             float64 `json:"self-harm"`
-			SelfHarmIntent       float64 `json:"self-harm/intent"`
-			SelfHarmInstructions float64 `json:"self-harm/instructions"`
-			Sexual               float64 `json:"sexual"`
-			SexualMinors         float64 `json:"sexual/minors"`
-			Violence             float64 `json:"violence"`
-			ViolenceGraphic      float64 `json:"violence/graphic"`
+			Hate                  bool `json:"hate"`
+			HateThreatening       bool `json:"hate/threatening"`
+			Harassment            bool `json:"harassment"`
+			HarassmentThreatening bool `json:"harassment/threatening"`
+			SelfHarm              bool `json:"self-harm"`
+			SelfHarmIntent        bool `json:"self-harm/intent"`
+			SelfHarmInstructions  bool `json:"self-harm/instructions"`
+			Sexual                bool `json:"sexual"`
+			SexualMinors          bool `json:"sexual/minors"`
+			Violence              bool `json:"violence"`
+			ViolenceGraphic       bool `json:"violence/graphic"`
 		} `json:"categories"`
 		CategoryScores struct {
-			Hate                 float64 `json:"hate"`
-			HateThreatening      float64 `json:"hate/threatening"`
-			SelfHarm             float64 `json:"self-harm"`
-			SelfHarmIntent       float64 `json:"self-harm/intent"`
-			SelfHarmInstructions float64 `json:"self-harm/instructions"`
-			Sexual               float64 `json:"sexual"`
-			SexualMinors         float64 `json:"sexual/minors"`
-			Violence             float64 `json:"violence"`
-			ViolenceGraphic      float64 `json:"violence/graphic"`
+			Hate                  float64 `json:"hate"`
+			HateThreatening       float64 `json:"hate/threatening"`
+			Harassment            float64 `json:"harassment"`
+			HarassmentThreatening float64 `json:"harassment/threatening"`
+			SelfHarm              float64 `json:"self-harm"`
+			SelfHarmIntent        float64 `json:"self-harm/intent"`
+			SelfHarmInstructions  float64 `json:"self-harm/instructions"`
+			Sexual                float64 `json:"sexual"`
+			SexualMinors          float64 `json:"sexual/minors"`
+			Violence              float64 `json:"violence"`
+			ViolenceGraphic       float64 `json:"violence/graphic"`
 		} `json:"category_scores"`
 		Flagged bool `json:"flagged"`
 	} `json:"results"`
@@ -150,7 +154,7 @@ func (s *ModerationService) analyzeWithOpenAI(ctx context.Context, content strin
 
 	requestBody := map[string]interface{}{
 		"input": content,
-		"model": "text-moderation-latest",
+		"model": "omni-moderation-latest",
 	}
 
 	jsonBody, err := json.Marshal(requestBody)
@@ -187,22 +191,28 @@ func (s *ModerationService) analyzeWithOpenAI(ctx context.Context, content strin
 	}
 
 	result := moderationResp.Results[0]
+	scores := result.CategoryScores
 	score := &ThreePoisonsScore{
-		// Map OpenAI categories to Three Poisons
+		// Map OpenAI category scores to Three Poisons
 		Hate: max(
-			result.Categories.Hate,
-			result.Categories.HateThreatening,
-			result.Categories.Violence,
-			result.Categories.ViolenceGraphic,
+			scores.Hate,
+			scores.HateThreatening,
+			scores.Harassment,
+			scores.HarassmentThreatening,
+			scores.Violence,
+			scores.ViolenceGraphic,
+			scores.Sexual,
+			scores.SexualMinors,
 		),
-		Greed: 0, // OpenAI doesn't detect greed/spam well
+		Greed: 0, // OpenAI doesn't detect greed/spam — handled by keyword fallback
 		Delusion: max(
-			result.Categories.SelfHarm,
-			result.Categories.SelfHarmIntent,
-			result.Categories.SelfHarmInstructions,
+			scores.SelfHarm,
+			scores.SelfHarmIntent,
+			scores.SelfHarmInstructions,
 		),
 	}
 
+	fmt.Printf("OpenAI moderation: flagged=%v hate=%.3f greed=%.3f delusion=%.3f\n", result.Flagged, score.Hate, score.Greed, score.Delusion)
 	return score, nil
 }