feat: full NSFW system - Cinemax rules, auto-reclassify with warning, not-allowed removal with appeal email, blur toggle setting, user self-labeling
This commit is contained in:
parent
68dd8d3544
commit
27b48128fe
|
|
@ -2,6 +2,7 @@ package handlers
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
@ -209,6 +210,8 @@ func (h *PostHandler) CreatePost(c *gin.Context) {
|
||||||
BeaconLat *float64 `json:"beacon_lat"`
|
BeaconLat *float64 `json:"beacon_lat"`
|
||||||
BeaconLong *float64 `json:"beacon_long"`
|
BeaconLong *float64 `json:"beacon_long"`
|
||||||
TTLHours *int `json:"ttl_hours"`
|
TTLHours *int `json:"ttl_hours"`
|
||||||
|
IsNSFW bool `json:"is_nsfw"`
|
||||||
|
NSFWReason string `json:"nsfw_reason"`
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := c.ShouldBindJSON(&req); err != nil {
|
if err := c.ShouldBindJSON(&req); err != nil {
|
||||||
|
|
@ -293,6 +296,8 @@ func (h *PostHandler) CreatePost(c *gin.Context) {
|
||||||
AllowChain: allowChain,
|
AllowChain: allowChain,
|
||||||
Visibility: "public",
|
Visibility: "public",
|
||||||
ExpiresAt: expiresAt,
|
ExpiresAt: expiresAt,
|
||||||
|
IsNSFW: req.IsNSFW,
|
||||||
|
NSFWReason: req.NSFWReason,
|
||||||
Lat: req.BeaconLat,
|
Lat: req.BeaconLat,
|
||||||
Long: req.BeaconLong,
|
Long: req.BeaconLong,
|
||||||
}
|
}
|
||||||
|
|
@ -338,18 +343,24 @@ func (h *PostHandler) CreatePost(c *gin.Context) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// 5b. OpenRouter AI Moderation — NSFW vs Flag decision
|
// 5b. OpenRouter AI Moderation — NSFW vs Flag decision
|
||||||
|
userSelfLabeledNSFW := req.IsNSFW
|
||||||
|
orDecision := ""
|
||||||
if h.openRouterService != nil {
|
if h.openRouterService != nil {
|
||||||
orResult, orErr := h.openRouterService.ModerateText(c.Request.Context(), req.Body)
|
orResult, orErr := h.openRouterService.ModerateText(c.Request.Context(), req.Body)
|
||||||
if orErr == nil && orResult != nil {
|
if orErr == nil && orResult != nil {
|
||||||
|
orDecision = orResult.Action
|
||||||
switch orResult.Action {
|
switch orResult.Action {
|
||||||
case "nsfw":
|
case "nsfw":
|
||||||
post.IsNSFW = true
|
post.IsNSFW = true
|
||||||
|
if orResult.NSFWReason != "" {
|
||||||
post.NSFWReason = orResult.NSFWReason
|
post.NSFWReason = orResult.NSFWReason
|
||||||
|
}
|
||||||
if post.Status != "pending_moderation" {
|
if post.Status != "pending_moderation" {
|
||||||
post.Status = "active" // NSFW posts are active but blurred
|
post.Status = "active" // NSFW posts are active but blurred
|
||||||
}
|
}
|
||||||
case "flag":
|
case "flag":
|
||||||
post.Status = "pending_moderation"
|
// NOT ALLOWED — will be removed after creation
|
||||||
|
post.Status = "removed"
|
||||||
}
|
}
|
||||||
// Update CIS from OpenRouter scores if available
|
// Update CIS from OpenRouter scores if available
|
||||||
if orResult.Hate > 0 || orResult.Greed > 0 || orResult.Delusion > 0 {
|
if orResult.Hate > 0 || orResult.Greed > 0 || orResult.Delusion > 0 {
|
||||||
|
|
@ -367,8 +378,7 @@ func (h *PostHandler) CreatePost(c *gin.Context) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle Flags - Comprehensive Content Flagging
|
// Handle Flags - Comprehensive Content Flagging
|
||||||
if h.moderationService != nil && post.Status == "pending_moderation" {
|
if h.moderationService != nil && (post.Status == "pending_moderation" || post.Status == "removed") {
|
||||||
// Extract all media URLs for flagging
|
|
||||||
mediaURLs := []string{}
|
mediaURLs := []string{}
|
||||||
if req.ImageURL != nil && *req.ImageURL != "" {
|
if req.ImageURL != nil && *req.ImageURL != "" {
|
||||||
mediaURLs = append(mediaURLs, *req.ImageURL)
|
mediaURLs = append(mediaURLs, *req.ImageURL)
|
||||||
|
|
@ -384,6 +394,64 @@ func (h *PostHandler) CreatePost(c *gin.Context) {
|
||||||
_ = h.moderationService.FlagPost(c.Request.Context(), post.ID, scores, reason)
|
_ = h.moderationService.FlagPost(c.Request.Context(), post.ID, scores, reason)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NSFW auto-reclassify: AI says NSFW but user didn't self-label → send warning
|
||||||
|
if post.IsNSFW && !userSelfLabeledNSFW && h.notificationService != nil {
|
||||||
|
go func() {
|
||||||
|
ctx := context.Background()
|
||||||
|
h.notificationService.NotifyNSFWWarning(ctx, userID.String(), post.ID.String())
|
||||||
|
log.Info().Str("post_id", post.ID.String()).Str("author_id", userID.String()).Msg("NSFW warning sent — post auto-labeled")
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
// NOT ALLOWED: AI flagged → post removed, create violation, send appeal notification + email
|
||||||
|
if post.Status == "removed" && orDecision == "flag" {
|
||||||
|
go func() {
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
// Send in-app notification
|
||||||
|
if h.notificationService != nil {
|
||||||
|
h.notificationService.NotifyContentRemoved(ctx, userID.String(), post.ID.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create moderation violation record
|
||||||
|
if h.moderationService != nil {
|
||||||
|
h.moderationService.FlagPost(ctx, post.ID, &services.ThreePoisonsScore{Hate: 1.0}, "not_allowed")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Send appeal email — get email from users table, display name from profiles
|
||||||
|
var userEmail string
|
||||||
|
h.postRepo.Pool().QueryRow(ctx, `SELECT email FROM users WHERE id = $1`, userID).Scan(&userEmail)
|
||||||
|
profile, _ := h.userRepo.GetProfileByID(ctx, userID.String())
|
||||||
|
if userEmail != "" {
|
||||||
|
displayName := "there"
|
||||||
|
if profile != nil && profile.DisplayName != nil {
|
||||||
|
displayName = *profile.DisplayName
|
||||||
|
}
|
||||||
|
snippet := req.Body
|
||||||
|
if len(snippet) > 100 {
|
||||||
|
snippet = snippet[:100] + "..."
|
||||||
|
}
|
||||||
|
appealBody := fmt.Sprintf(
|
||||||
|
"Hi %s,\n\n"+
|
||||||
|
"Your recent post on Sojorn was removed because it was found to violate our community guidelines.\n\n"+
|
||||||
|
"Post content: \"%s\"\n\n"+
|
||||||
|
"If you believe this was a mistake, you can appeal this decision in your Sojorn app:\n"+
|
||||||
|
"Go to Profile → Settings → Appeals\n\n"+
|
||||||
|
"Our moderation team will review your appeal within 48 hours.\n\n"+
|
||||||
|
"— The Sojorn Team",
|
||||||
|
displayName, snippet,
|
||||||
|
)
|
||||||
|
log.Info().Str("email", userEmail).Msg("Sending content removal appeal email")
|
||||||
|
h.postRepo.Pool().Exec(ctx,
|
||||||
|
`INSERT INTO email_queue (to_email, subject, body, created_at) VALUES ($1, $2, $3, NOW()) ON CONFLICT DO NOTHING`,
|
||||||
|
userEmail, "Your Sojorn post was removed", appealBody,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Warn().Str("post_id", post.ID.String()).Str("author_id", userID.String()).Msg("Post removed by AI moderation — not allowed content")
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
// Log AI moderation decision to audit log
|
// Log AI moderation decision to audit log
|
||||||
if h.moderationService != nil {
|
if h.moderationService != nil {
|
||||||
decision := "pass"
|
decision := "pass"
|
||||||
|
|
@ -391,7 +459,9 @@ func (h *PostHandler) CreatePost(c *gin.Context) {
|
||||||
if post.ToneLabel != nil && *post.ToneLabel != "" {
|
if post.ToneLabel != nil && *post.ToneLabel != "" {
|
||||||
flagReason = *post.ToneLabel
|
flagReason = *post.ToneLabel
|
||||||
}
|
}
|
||||||
if post.Status == "pending_moderation" {
|
if post.Status == "removed" {
|
||||||
|
decision = "flag"
|
||||||
|
} else if post.Status == "pending_moderation" {
|
||||||
decision = "flag"
|
decision = "flag"
|
||||||
} else if post.IsNSFW {
|
} else if post.IsNSFW {
|
||||||
decision = "nsfw"
|
decision = "nsfw"
|
||||||
|
|
@ -403,7 +473,7 @@ func (h *PostHandler) CreatePost(c *gin.Context) {
|
||||||
} else {
|
} else {
|
||||||
scores = &services.ThreePoisonsScore{}
|
scores = &services.ThreePoisonsScore{}
|
||||||
}
|
}
|
||||||
h.moderationService.LogAIDecision(c.Request.Context(), "post", post.ID, userID, req.Body, scores, nil, decision, flagReason, "", nil)
|
h.moderationService.LogAIDecision(c.Request.Context(), "post", post.ID, userID, req.Body, scores, nil, decision, flagReason, orDecision, nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check for @mentions and notify mentioned users
|
// Check for @mentions and notify mentioned users
|
||||||
|
|
|
||||||
|
|
@ -22,6 +22,8 @@ const (
|
||||||
NotificationTypeBeaconReport = "beacon_report"
|
NotificationTypeBeaconReport = "beacon_report"
|
||||||
NotificationTypeShare = "share"
|
NotificationTypeShare = "share"
|
||||||
NotificationTypeQuipReaction = "quip_reaction"
|
NotificationTypeQuipReaction = "quip_reaction"
|
||||||
|
NotificationTypeNSFWWarning = "nsfw_warning"
|
||||||
|
NotificationTypeContentRemoved = "content_removed"
|
||||||
)
|
)
|
||||||
|
|
||||||
// NotificationPriority constants
|
// NotificationPriority constants
|
||||||
|
|
|
||||||
|
|
@ -31,5 +31,6 @@ type UserSettings struct {
|
||||||
DataSaverMode *bool `json:"data_saver_mode" db:"data_saver_mode"`
|
DataSaverMode *bool `json:"data_saver_mode" db:"data_saver_mode"`
|
||||||
DefaultPostTtl *int `json:"default_post_ttl" db:"default_post_ttl"`
|
DefaultPostTtl *int `json:"default_post_ttl" db:"default_post_ttl"`
|
||||||
NSFWEnabled *bool `json:"nsfw_enabled" db:"nsfw_enabled"`
|
NSFWEnabled *bool `json:"nsfw_enabled" db:"nsfw_enabled"`
|
||||||
|
NSFWBlurEnabled *bool `json:"nsfw_blur_enabled" db:"nsfw_blur_enabled"`
|
||||||
UpdatedAt time.Time `json:"updated_at" db:"updated_at"`
|
UpdatedAt time.Time `json:"updated_at" db:"updated_at"`
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,10 @@ func NewPostRepository(pool *pgxpool.Pool) *PostRepository {
|
||||||
return &PostRepository{pool: pool}
|
return &PostRepository{pool: pool}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (r *PostRepository) Pool() *pgxpool.Pool {
|
||||||
|
return r.pool
|
||||||
|
}
|
||||||
|
|
||||||
func (r *PostRepository) CreatePost(ctx context.Context, post *models.Post) error {
|
func (r *PostRepository) CreatePost(ctx context.Context, post *models.Post) error {
|
||||||
// Calculate confidence score if it's a beacon
|
// Calculate confidence score if it's a beacon
|
||||||
if post.IsBeacon {
|
if post.IsBeacon {
|
||||||
|
|
|
||||||
|
|
@ -729,7 +729,7 @@ func (r *UserRepository) GetUserSettings(ctx context.Context, userID string) (*m
|
||||||
query := `
|
query := `
|
||||||
SELECT user_id, theme, language, notifications_enabled, email_notifications,
|
SELECT user_id, theme, language, notifications_enabled, email_notifications,
|
||||||
push_notifications, content_filter_level, auto_play_videos, data_saver_mode,
|
push_notifications, content_filter_level, auto_play_videos, data_saver_mode,
|
||||||
default_post_ttl, COALESCE(nsfw_enabled, FALSE), updated_at
|
default_post_ttl, COALESCE(nsfw_enabled, FALSE), COALESCE(nsfw_blur_enabled, TRUE), updated_at
|
||||||
FROM public.user_settings
|
FROM public.user_settings
|
||||||
WHERE user_id = $1::uuid
|
WHERE user_id = $1::uuid
|
||||||
`
|
`
|
||||||
|
|
@ -737,7 +737,7 @@ func (r *UserRepository) GetUserSettings(ctx context.Context, userID string) (*m
|
||||||
err := r.pool.QueryRow(ctx, query, userID).Scan(
|
err := r.pool.QueryRow(ctx, query, userID).Scan(
|
||||||
&us.UserID, &us.Theme, &us.Language, &us.NotificationsEnabled, &us.EmailNotifications,
|
&us.UserID, &us.Theme, &us.Language, &us.NotificationsEnabled, &us.EmailNotifications,
|
||||||
&us.PushNotifications, &us.ContentFilterLevel, &us.AutoPlayVideos, &us.DataSaverMode,
|
&us.PushNotifications, &us.ContentFilterLevel, &us.AutoPlayVideos, &us.DataSaverMode,
|
||||||
&us.DefaultPostTtl, &us.NSFWEnabled, &us.UpdatedAt,
|
&us.DefaultPostTtl, &us.NSFWEnabled, &us.NSFWBlurEnabled, &us.UpdatedAt,
|
||||||
)
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if err.Error() == "no rows in result set" || err.Error() == "pgx: no rows in result set" {
|
if err.Error() == "no rows in result set" || err.Error() == "pgx: no rows in result set" {
|
||||||
|
|
@ -759,6 +759,7 @@ func (r *UserRepository) GetUserSettings(ctx context.Context, userID string) (*m
|
||||||
AutoPlayVideos: &t,
|
AutoPlayVideos: &t,
|
||||||
DataSaverMode: &f,
|
DataSaverMode: &f,
|
||||||
NSFWEnabled: &f,
|
NSFWEnabled: &f,
|
||||||
|
NSFWBlurEnabled: &t,
|
||||||
UpdatedAt: time.Now(),
|
UpdatedAt: time.Now(),
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
@ -772,8 +773,8 @@ func (r *UserRepository) UpdateUserSettings(ctx context.Context, us *models.User
|
||||||
INSERT INTO public.user_settings (
|
INSERT INTO public.user_settings (
|
||||||
user_id, theme, language, notifications_enabled, email_notifications,
|
user_id, theme, language, notifications_enabled, email_notifications,
|
||||||
push_notifications, content_filter_level, auto_play_videos, data_saver_mode,
|
push_notifications, content_filter_level, auto_play_videos, data_saver_mode,
|
||||||
default_post_ttl, nsfw_enabled, updated_at
|
default_post_ttl, nsfw_enabled, nsfw_blur_enabled, updated_at
|
||||||
) VALUES ($1::uuid, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, NOW())
|
) VALUES ($1::uuid, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, NOW())
|
||||||
ON CONFLICT (user_id) DO UPDATE SET
|
ON CONFLICT (user_id) DO UPDATE SET
|
||||||
theme = COALESCE(EXCLUDED.theme, user_settings.theme),
|
theme = COALESCE(EXCLUDED.theme, user_settings.theme),
|
||||||
language = COALESCE(EXCLUDED.language, user_settings.language),
|
language = COALESCE(EXCLUDED.language, user_settings.language),
|
||||||
|
|
@ -785,12 +786,13 @@ func (r *UserRepository) UpdateUserSettings(ctx context.Context, us *models.User
|
||||||
data_saver_mode = COALESCE(EXCLUDED.data_saver_mode, user_settings.data_saver_mode),
|
data_saver_mode = COALESCE(EXCLUDED.data_saver_mode, user_settings.data_saver_mode),
|
||||||
default_post_ttl = COALESCE(EXCLUDED.default_post_ttl, user_settings.default_post_ttl),
|
default_post_ttl = COALESCE(EXCLUDED.default_post_ttl, user_settings.default_post_ttl),
|
||||||
nsfw_enabled = COALESCE(EXCLUDED.nsfw_enabled, user_settings.nsfw_enabled),
|
nsfw_enabled = COALESCE(EXCLUDED.nsfw_enabled, user_settings.nsfw_enabled),
|
||||||
|
nsfw_blur_enabled = COALESCE(EXCLUDED.nsfw_blur_enabled, user_settings.nsfw_blur_enabled),
|
||||||
updated_at = NOW()
|
updated_at = NOW()
|
||||||
`
|
`
|
||||||
_, err := r.pool.Exec(ctx, query,
|
_, err := r.pool.Exec(ctx, query,
|
||||||
us.UserID, us.Theme, us.Language, us.NotificationsEnabled, us.EmailNotifications,
|
us.UserID, us.Theme, us.Language, us.NotificationsEnabled, us.EmailNotifications,
|
||||||
us.PushNotifications, us.ContentFilterLevel, us.AutoPlayVideos, us.DataSaverMode,
|
us.PushNotifications, us.ContentFilterLevel, us.AutoPlayVideos, us.DataSaverMode,
|
||||||
us.DefaultPostTtl, us.NSFWEnabled,
|
us.DefaultPostTtl, us.NSFWEnabled, us.NSFWBlurEnabled,
|
||||||
)
|
)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -206,6 +206,32 @@ func (s *NotificationService) NotifyBeaconReport(ctx context.Context, beaconAuth
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NotifyNSFWWarning sends a warning when a post is auto-labeled as NSFW
|
||||||
|
func (s *NotificationService) NotifyNSFWWarning(ctx context.Context, authorID string, postID string) error {
|
||||||
|
authorUUID := uuid.MustParse(authorID)
|
||||||
|
return s.sendNotification(ctx, models.PushNotificationRequest{
|
||||||
|
UserID: authorUUID,
|
||||||
|
Type: models.NotificationTypeNSFWWarning,
|
||||||
|
ActorID: authorUUID, // system-generated, actor is self
|
||||||
|
PostID: uuidPtr(postID),
|
||||||
|
PostType: "standard",
|
||||||
|
Priority: models.PriorityHigh,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// NotifyContentRemoved sends a notification when content is removed by AI moderation
|
||||||
|
func (s *NotificationService) NotifyContentRemoved(ctx context.Context, authorID string, postID string) error {
|
||||||
|
authorUUID := uuid.MustParse(authorID)
|
||||||
|
return s.sendNotification(ctx, models.PushNotificationRequest{
|
||||||
|
UserID: authorUUID,
|
||||||
|
Type: models.NotificationTypeContentRemoved,
|
||||||
|
ActorID: authorUUID, // system-generated
|
||||||
|
PostID: uuidPtr(postID),
|
||||||
|
PostType: "standard",
|
||||||
|
Priority: models.PriorityUrgent,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// Core Send Logic
|
// Core Send Logic
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
|
|
@ -402,6 +428,16 @@ func (s *NotificationService) buildPushPayload(req models.PushNotificationReques
|
||||||
body = fmt.Sprintf("%s reacted to your quip", actorName)
|
body = fmt.Sprintf("%s reacted to your quip", actorName)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case models.NotificationTypeNSFWWarning:
|
||||||
|
title = "Content Labeled as Sensitive"
|
||||||
|
body = "Your post was automatically labeled as NSFW. Please label sensitive content when posting to avoid further action."
|
||||||
|
data["target"] = "main_feed"
|
||||||
|
|
||||||
|
case models.NotificationTypeContentRemoved:
|
||||||
|
title = "Content Removed"
|
||||||
|
body = "Your post was removed for violating community guidelines. You can appeal this decision in your profile settings."
|
||||||
|
data["target"] = "profile_settings"
|
||||||
|
|
||||||
default:
|
default:
|
||||||
title = "Sojorn"
|
title = "Sojorn"
|
||||||
body = "You have a new notification"
|
body = "You have a new notification"
|
||||||
|
|
|
||||||
|
|
@ -414,18 +414,66 @@ const defaultModerationSystemPrompt = `You are a content moderation AI for Sojor
|
||||||
Analyze the provided content and decide one of three actions:
|
Analyze the provided content and decide one of three actions:
|
||||||
|
|
||||||
1. "clean" — Content is appropriate for all users. No issues.
|
1. "clean" — Content is appropriate for all users. No issues.
|
||||||
2. "nsfw" — Content is NOT illegal or bannable, but is mature/sensitive. Examples: mild violence, suggestive (but not explicit) imagery, dark humor, intense themes, horror content, heated political speech, depictions of alcohol/smoking. This content will be blurred with a warning label so users who opted in can choose to view it.
|
2. "nsfw" — Content is mature/sensitive but ALLOWED on the platform. It will be blurred behind a warning label for users who have opted in. Think "Cinemax late night" — permissive but not extreme.
|
||||||
3. "flag" — Content violates platform policy and should be reviewed by moderators. Examples: explicit nudity/pornography, graphic gore, illegal activity, credible threats, child exploitation, hard drug use instructions, doxxing, extreme hate speech.
|
3. "flag" — Content is NOT ALLOWED and will be removed. The user will receive an appeal notice.
|
||||||
|
|
||||||
When unsure, prefer "nsfw" over "flag" — only flag content you believe is clearly illegal or extremely graphic.
|
═══════════════════════════════════════════
|
||||||
|
NUDITY & SEXUAL CONTENT RULES (Cinemax Rule)
|
||||||
|
═══════════════════════════════════════════
|
||||||
|
NSFW (allowed, blurred):
|
||||||
|
- Partial or full nudity (breasts, buttocks, genitalia visible)
|
||||||
|
- Suggestive or sensual poses, lingerie, implied sexual situations
|
||||||
|
- Artistic nude photography, figure drawing, body-positive content
|
||||||
|
- Breastfeeding, non-sexual nudity in natural contexts
|
||||||
|
|
||||||
|
NOT ALLOWED (flag):
|
||||||
|
- Explicit sexual intercourse (penetration, oral sex, any sex acts)
|
||||||
|
- Hardcore pornography of any kind
|
||||||
|
- Any sexual content involving minors (ZERO TOLERANCE — always flag)
|
||||||
|
- Non-consensual sexual content, revenge porn
|
||||||
|
- Bestiality
|
||||||
|
|
||||||
|
═══════════════════════════════════════════
|
||||||
|
VIOLENCE RULES (1-10 Scale)
|
||||||
|
═══════════════════════════════════════════
|
||||||
|
Rate the violence level on a 1-10 scale in your explanation:
|
||||||
|
1-3: Mild (arguments, shoving, cartoon violence) → "clean"
|
||||||
|
4-5: Moderate (blood from injuries, protest footage with blood, boxing/MMA, hunting) → "nsfw"
|
||||||
|
6-7: Graphic (open wounds, significant bloodshed, war footage) → "flag"
|
||||||
|
8-10: Extreme (torture, dismemberment, gore, execution) → "flag"
|
||||||
|
|
||||||
|
Only violence rated 5 or below is allowed. 6+ is always flagged and removed.
|
||||||
|
Protest footage showing blood or injuries = NSFW (4-5), NOT flagged.
|
||||||
|
|
||||||
|
═══════════════════════════════════════════
|
||||||
|
OTHER CONTENT RULES
|
||||||
|
═══════════════════════════════════════════
|
||||||
|
NSFW (allowed, blurred):
|
||||||
|
- Dark humor, edgy memes, intense themes
|
||||||
|
- Horror content, gore in fiction/movies (≤5 on violence scale)
|
||||||
|
- Drug/alcohol references, smoking imagery
|
||||||
|
- Heated political speech, strong profanity
|
||||||
|
- Depictions of self-harm recovery (educational/supportive context)
|
||||||
|
|
||||||
|
NOT ALLOWED (flag):
|
||||||
|
- Credible threats of violence against real people
|
||||||
|
- Doxxing (sharing private info to harass)
|
||||||
|
- Illegal activity instructions (bomb-making, drug synthesis)
|
||||||
|
- Extreme hate speech targeting protected groups
|
||||||
|
- Spam/scam content designed to defraud users
|
||||||
|
- Dangerous medical misinformation that could cause harm
|
||||||
|
- Deepfakes designed to deceive or defame
|
||||||
|
|
||||||
|
When unsure between clean and nsfw, prefer "nsfw" (better safe, user sees it blurred).
|
||||||
|
When unsure between nsfw and flag, prefer "nsfw" — only flag content that clearly crosses the lines above.
|
||||||
|
|
||||||
Respond ONLY with a JSON object in this exact format:
|
Respond ONLY with a JSON object in this exact format:
|
||||||
{
|
{
|
||||||
"action": "clean" or "nsfw" or "flag",
|
"action": "clean" or "nsfw" or "flag",
|
||||||
"nsfw_reason": "If action is nsfw, a short label users will see: e.g. 'Violence', 'Suggestive Content', '18+ Themes', 'Gore', 'Drug References'. Empty string if clean or flag.",
|
"nsfw_reason": "If action is nsfw, a short label: e.g. 'Nudity', 'Violence', 'Suggestive Content', '18+ Themes', 'Gore', 'Drug References'. Empty string if clean or flag.",
|
||||||
"flagged": true/false,
|
"flagged": true/false,
|
||||||
"reason": "one-line summary if flagged or nsfw, empty string if clean",
|
"reason": "one-line summary if flagged or nsfw, empty string if clean",
|
||||||
"explanation": "Detailed paragraph explaining your full analysis and why you chose this action.",
|
"explanation": "Detailed paragraph explaining your analysis. For violence, include your 1-10 rating. For nudity, explain what is shown and why it does or does not cross the intercourse line.",
|
||||||
"hate": 0.0-1.0,
|
"hate": 0.0-1.0,
|
||||||
"hate_detail": "What you found or didn't find related to hate/violence/sexual content.",
|
"hate_detail": "What you found or didn't find related to hate/violence/sexual content.",
|
||||||
"greed": 0.0-1.0,
|
"greed": 0.0-1.0,
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue