Fix OpenAI Moderation API: correct response parsing, use omni-moderation-latest model
This commit is contained in:
parent
ec5a0aad8b
commit
66fe4bd60e
|
|
@ -42,26 +42,30 @@ type ThreePoisonsScore struct {
|
||||||
type OpenAIModerationResponse struct {
|
type OpenAIModerationResponse struct {
|
||||||
Results []struct {
|
Results []struct {
|
||||||
Categories struct {
|
Categories struct {
|
||||||
Hate float64 `json:"hate"`
|
Hate bool `json:"hate"`
|
||||||
HateThreatening float64 `json:"hate/threatening"`
|
HateThreatening bool `json:"hate/threatening"`
|
||||||
SelfHarm float64 `json:"self-harm"`
|
Harassment bool `json:"harassment"`
|
||||||
SelfHarmIntent float64 `json:"self-harm/intent"`
|
HarassmentThreatening bool `json:"harassment/threatening"`
|
||||||
SelfHarmInstructions float64 `json:"self-harm/instructions"`
|
SelfHarm bool `json:"self-harm"`
|
||||||
Sexual float64 `json:"sexual"`
|
SelfHarmIntent bool `json:"self-harm/intent"`
|
||||||
SexualMinors float64 `json:"sexual/minors"`
|
SelfHarmInstructions bool `json:"self-harm/instructions"`
|
||||||
Violence float64 `json:"violence"`
|
Sexual bool `json:"sexual"`
|
||||||
ViolenceGraphic float64 `json:"violence/graphic"`
|
SexualMinors bool `json:"sexual/minors"`
|
||||||
|
Violence bool `json:"violence"`
|
||||||
|
ViolenceGraphic bool `json:"violence/graphic"`
|
||||||
} `json:"categories"`
|
} `json:"categories"`
|
||||||
CategoryScores struct {
|
CategoryScores struct {
|
||||||
Hate float64 `json:"hate"`
|
Hate float64 `json:"hate"`
|
||||||
HateThreatening float64 `json:"hate/threatening"`
|
HateThreatening float64 `json:"hate/threatening"`
|
||||||
SelfHarm float64 `json:"self-harm"`
|
Harassment float64 `json:"harassment"`
|
||||||
SelfHarmIntent float64 `json:"self-harm/intent"`
|
HarassmentThreatening float64 `json:"harassment/threatening"`
|
||||||
SelfHarmInstructions float64 `json:"self-harm/instructions"`
|
SelfHarm float64 `json:"self-harm"`
|
||||||
Sexual float64 `json:"sexual"`
|
SelfHarmIntent float64 `json:"self-harm/intent"`
|
||||||
SexualMinors float64 `json:"sexual/minors"`
|
SelfHarmInstructions float64 `json:"self-harm/instructions"`
|
||||||
Violence float64 `json:"violence"`
|
Sexual float64 `json:"sexual"`
|
||||||
ViolenceGraphic float64 `json:"violence/graphic"`
|
SexualMinors float64 `json:"sexual/minors"`
|
||||||
|
Violence float64 `json:"violence"`
|
||||||
|
ViolenceGraphic float64 `json:"violence/graphic"`
|
||||||
} `json:"category_scores"`
|
} `json:"category_scores"`
|
||||||
Flagged bool `json:"flagged"`
|
Flagged bool `json:"flagged"`
|
||||||
} `json:"results"`
|
} `json:"results"`
|
||||||
|
|
@ -150,7 +154,7 @@ func (s *ModerationService) analyzeWithOpenAI(ctx context.Context, content strin
|
||||||
|
|
||||||
requestBody := map[string]interface{}{
|
requestBody := map[string]interface{}{
|
||||||
"input": content,
|
"input": content,
|
||||||
"model": "text-moderation-latest",
|
"model": "omni-moderation-latest",
|
||||||
}
|
}
|
||||||
|
|
||||||
jsonBody, err := json.Marshal(requestBody)
|
jsonBody, err := json.Marshal(requestBody)
|
||||||
|
|
@ -187,22 +191,28 @@ func (s *ModerationService) analyzeWithOpenAI(ctx context.Context, content strin
|
||||||
}
|
}
|
||||||
|
|
||||||
result := moderationResp.Results[0]
|
result := moderationResp.Results[0]
|
||||||
|
scores := result.CategoryScores
|
||||||
score := &ThreePoisonsScore{
|
score := &ThreePoisonsScore{
|
||||||
// Map OpenAI categories to Three Poisons
|
// Map OpenAI category scores to Three Poisons
|
||||||
Hate: max(
|
Hate: max(
|
||||||
result.Categories.Hate,
|
scores.Hate,
|
||||||
result.Categories.HateThreatening,
|
scores.HateThreatening,
|
||||||
result.Categories.Violence,
|
scores.Harassment,
|
||||||
result.Categories.ViolenceGraphic,
|
scores.HarassmentThreatening,
|
||||||
|
scores.Violence,
|
||||||
|
scores.ViolenceGraphic,
|
||||||
|
scores.Sexual,
|
||||||
|
scores.SexualMinors,
|
||||||
),
|
),
|
||||||
Greed: 0, // OpenAI doesn't detect greed/spam well
|
Greed: 0, // OpenAI doesn't detect greed/spam — handled by keyword fallback
|
||||||
Delusion: max(
|
Delusion: max(
|
||||||
result.Categories.SelfHarm,
|
scores.SelfHarm,
|
||||||
result.Categories.SelfHarmIntent,
|
scores.SelfHarmIntent,
|
||||||
result.Categories.SelfHarmInstructions,
|
scores.SelfHarmInstructions,
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fmt.Printf("OpenAI moderation: flagged=%v hate=%.3f greed=%.3f delusion=%.3f\n", result.Flagged, score.Hate, score.Greed, score.Delusion)
|
||||||
return score, nil
|
return score, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue