Fix OpenAI Moderation API: correct response parsing, use omni-moderation-latest model
This commit is contained in:
parent
ec5a0aad8b
commit
66fe4bd60e
|
|
@ -42,19 +42,23 @@ type ThreePoisonsScore struct {
|
|||
type OpenAIModerationResponse struct {
|
||||
Results []struct {
|
||||
Categories struct {
|
||||
Hate float64 `json:"hate"`
|
||||
HateThreatening float64 `json:"hate/threatening"`
|
||||
SelfHarm float64 `json:"self-harm"`
|
||||
SelfHarmIntent float64 `json:"self-harm/intent"`
|
||||
SelfHarmInstructions float64 `json:"self-harm/instructions"`
|
||||
Sexual float64 `json:"sexual"`
|
||||
SexualMinors float64 `json:"sexual/minors"`
|
||||
Violence float64 `json:"violence"`
|
||||
ViolenceGraphic float64 `json:"violence/graphic"`
|
||||
Hate bool `json:"hate"`
|
||||
HateThreatening bool `json:"hate/threatening"`
|
||||
Harassment bool `json:"harassment"`
|
||||
HarassmentThreatening bool `json:"harassment/threatening"`
|
||||
SelfHarm bool `json:"self-harm"`
|
||||
SelfHarmIntent bool `json:"self-harm/intent"`
|
||||
SelfHarmInstructions bool `json:"self-harm/instructions"`
|
||||
Sexual bool `json:"sexual"`
|
||||
SexualMinors bool `json:"sexual/minors"`
|
||||
Violence bool `json:"violence"`
|
||||
ViolenceGraphic bool `json:"violence/graphic"`
|
||||
} `json:"categories"`
|
||||
CategoryScores struct {
|
||||
Hate float64 `json:"hate"`
|
||||
HateThreatening float64 `json:"hate/threatening"`
|
||||
Harassment float64 `json:"harassment"`
|
||||
HarassmentThreatening float64 `json:"harassment/threatening"`
|
||||
SelfHarm float64 `json:"self-harm"`
|
||||
SelfHarmIntent float64 `json:"self-harm/intent"`
|
||||
SelfHarmInstructions float64 `json:"self-harm/instructions"`
|
||||
|
|
@ -150,7 +154,7 @@ func (s *ModerationService) analyzeWithOpenAI(ctx context.Context, content strin
|
|||
|
||||
requestBody := map[string]interface{}{
|
||||
"input": content,
|
||||
"model": "text-moderation-latest",
|
||||
"model": "omni-moderation-latest",
|
||||
}
|
||||
|
||||
jsonBody, err := json.Marshal(requestBody)
|
||||
|
|
@ -187,22 +191,28 @@ func (s *ModerationService) analyzeWithOpenAI(ctx context.Context, content strin
|
|||
}
|
||||
|
||||
result := moderationResp.Results[0]
|
||||
scores := result.CategoryScores
|
||||
score := &ThreePoisonsScore{
|
||||
// Map OpenAI categories to Three Poisons
|
||||
// Map OpenAI category scores to Three Poisons
|
||||
Hate: max(
|
||||
result.Categories.Hate,
|
||||
result.Categories.HateThreatening,
|
||||
result.Categories.Violence,
|
||||
result.Categories.ViolenceGraphic,
|
||||
scores.Hate,
|
||||
scores.HateThreatening,
|
||||
scores.Harassment,
|
||||
scores.HarassmentThreatening,
|
||||
scores.Violence,
|
||||
scores.ViolenceGraphic,
|
||||
scores.Sexual,
|
||||
scores.SexualMinors,
|
||||
),
|
||||
Greed: 0, // OpenAI doesn't detect greed/spam well
|
||||
Greed: 0, // OpenAI doesn't detect greed/spam — handled by keyword fallback
|
||||
Delusion: max(
|
||||
result.Categories.SelfHarm,
|
||||
result.Categories.SelfHarmIntent,
|
||||
result.Categories.SelfHarmInstructions,
|
||||
scores.SelfHarm,
|
||||
scores.SelfHarmIntent,
|
||||
scores.SelfHarmInstructions,
|
||||
),
|
||||
}
|
||||
|
||||
fmt.Printf("OpenAI moderation: flagged=%v hate=%.3f greed=%.3f delusion=%.3f\n", result.Flagged, score.Hate, score.Greed, score.Delusion)
|
||||
return score, nil
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue