feat: full NSFW system - Cinemax rules, auto-reclassify with warning, not-allowed removal with appeal email, blur toggle setting, user self-labeling
This commit is contained in:
parent
68dd8d3544
commit
27b48128fe
|
|
@ -2,6 +2,7 @@ package handlers
|
|||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
|
@ -209,6 +210,8 @@ func (h *PostHandler) CreatePost(c *gin.Context) {
|
|||
BeaconLat *float64 `json:"beacon_lat"`
|
||||
BeaconLong *float64 `json:"beacon_long"`
|
||||
TTLHours *int `json:"ttl_hours"`
|
||||
IsNSFW bool `json:"is_nsfw"`
|
||||
NSFWReason string `json:"nsfw_reason"`
|
||||
}
|
||||
|
||||
if err := c.ShouldBindJSON(&req); err != nil {
|
||||
|
|
@ -293,6 +296,8 @@ func (h *PostHandler) CreatePost(c *gin.Context) {
|
|||
AllowChain: allowChain,
|
||||
Visibility: "public",
|
||||
ExpiresAt: expiresAt,
|
||||
IsNSFW: req.IsNSFW,
|
||||
NSFWReason: req.NSFWReason,
|
||||
Lat: req.BeaconLat,
|
||||
Long: req.BeaconLong,
|
||||
}
|
||||
|
|
@ -338,18 +343,24 @@ func (h *PostHandler) CreatePost(c *gin.Context) {
|
|||
}
|
||||
|
||||
// 5b. OpenRouter AI Moderation — NSFW vs Flag decision
|
||||
userSelfLabeledNSFW := req.IsNSFW
|
||||
orDecision := ""
|
||||
if h.openRouterService != nil {
|
||||
orResult, orErr := h.openRouterService.ModerateText(c.Request.Context(), req.Body)
|
||||
if orErr == nil && orResult != nil {
|
||||
orDecision = orResult.Action
|
||||
switch orResult.Action {
|
||||
case "nsfw":
|
||||
post.IsNSFW = true
|
||||
if orResult.NSFWReason != "" {
|
||||
post.NSFWReason = orResult.NSFWReason
|
||||
}
|
||||
if post.Status != "pending_moderation" {
|
||||
post.Status = "active" // NSFW posts are active but blurred
|
||||
}
|
||||
case "flag":
|
||||
post.Status = "pending_moderation"
|
||||
// NOT ALLOWED — will be removed after creation
|
||||
post.Status = "removed"
|
||||
}
|
||||
// Update CIS from OpenRouter scores if available
|
||||
if orResult.Hate > 0 || orResult.Greed > 0 || orResult.Delusion > 0 {
|
||||
|
|
@ -367,8 +378,7 @@ func (h *PostHandler) CreatePost(c *gin.Context) {
|
|||
}
|
||||
|
||||
// Handle Flags - Comprehensive Content Flagging
|
||||
if h.moderationService != nil && post.Status == "pending_moderation" {
|
||||
// Extract all media URLs for flagging
|
||||
if h.moderationService != nil && (post.Status == "pending_moderation" || post.Status == "removed") {
|
||||
mediaURLs := []string{}
|
||||
if req.ImageURL != nil && *req.ImageURL != "" {
|
||||
mediaURLs = append(mediaURLs, *req.ImageURL)
|
||||
|
|
@ -384,6 +394,64 @@ func (h *PostHandler) CreatePost(c *gin.Context) {
|
|||
_ = h.moderationService.FlagPost(c.Request.Context(), post.ID, scores, reason)
|
||||
}
|
||||
|
||||
// NSFW auto-reclassify: AI says NSFW but user didn't self-label → send warning
|
||||
if post.IsNSFW && !userSelfLabeledNSFW && h.notificationService != nil {
|
||||
go func() {
|
||||
ctx := context.Background()
|
||||
h.notificationService.NotifyNSFWWarning(ctx, userID.String(), post.ID.String())
|
||||
log.Info().Str("post_id", post.ID.String()).Str("author_id", userID.String()).Msg("NSFW warning sent — post auto-labeled")
|
||||
}()
|
||||
}
|
||||
|
||||
// NOT ALLOWED: AI flagged → post removed, create violation, send appeal notification + email
|
||||
if post.Status == "removed" && orDecision == "flag" {
|
||||
go func() {
|
||||
ctx := context.Background()
|
||||
|
||||
// Send in-app notification
|
||||
if h.notificationService != nil {
|
||||
h.notificationService.NotifyContentRemoved(ctx, userID.String(), post.ID.String())
|
||||
}
|
||||
|
||||
// Create moderation violation record
|
||||
if h.moderationService != nil {
|
||||
h.moderationService.FlagPost(ctx, post.ID, &services.ThreePoisonsScore{Hate: 1.0}, "not_allowed")
|
||||
}
|
||||
|
||||
// Send appeal email — get email from users table, display name from profiles
|
||||
var userEmail string
|
||||
h.postRepo.Pool().QueryRow(ctx, `SELECT email FROM users WHERE id = $1`, userID).Scan(&userEmail)
|
||||
profile, _ := h.userRepo.GetProfileByID(ctx, userID.String())
|
||||
if userEmail != "" {
|
||||
displayName := "there"
|
||||
if profile != nil && profile.DisplayName != nil {
|
||||
displayName = *profile.DisplayName
|
||||
}
|
||||
snippet := req.Body
|
||||
if len(snippet) > 100 {
|
||||
snippet = snippet[:100] + "..."
|
||||
}
|
||||
appealBody := fmt.Sprintf(
|
||||
"Hi %s,\n\n"+
|
||||
"Your recent post on Sojorn was removed because it was found to violate our community guidelines.\n\n"+
|
||||
"Post content: \"%s\"\n\n"+
|
||||
"If you believe this was a mistake, you can appeal this decision in your Sojorn app:\n"+
|
||||
"Go to Profile → Settings → Appeals\n\n"+
|
||||
"Our moderation team will review your appeal within 48 hours.\n\n"+
|
||||
"— The Sojorn Team",
|
||||
displayName, snippet,
|
||||
)
|
||||
log.Info().Str("email", userEmail).Msg("Sending content removal appeal email")
|
||||
h.postRepo.Pool().Exec(ctx,
|
||||
`INSERT INTO email_queue (to_email, subject, body, created_at) VALUES ($1, $2, $3, NOW()) ON CONFLICT DO NOTHING`,
|
||||
userEmail, "Your Sojorn post was removed", appealBody,
|
||||
)
|
||||
}
|
||||
|
||||
log.Warn().Str("post_id", post.ID.String()).Str("author_id", userID.String()).Msg("Post removed by AI moderation — not allowed content")
|
||||
}()
|
||||
}
|
||||
|
||||
// Log AI moderation decision to audit log
|
||||
if h.moderationService != nil {
|
||||
decision := "pass"
|
||||
|
|
@ -391,7 +459,9 @@ func (h *PostHandler) CreatePost(c *gin.Context) {
|
|||
if post.ToneLabel != nil && *post.ToneLabel != "" {
|
||||
flagReason = *post.ToneLabel
|
||||
}
|
||||
if post.Status == "pending_moderation" {
|
||||
if post.Status == "removed" {
|
||||
decision = "flag"
|
||||
} else if post.Status == "pending_moderation" {
|
||||
decision = "flag"
|
||||
} else if post.IsNSFW {
|
||||
decision = "nsfw"
|
||||
|
|
@ -403,7 +473,7 @@ func (h *PostHandler) CreatePost(c *gin.Context) {
|
|||
} else {
|
||||
scores = &services.ThreePoisonsScore{}
|
||||
}
|
||||
h.moderationService.LogAIDecision(c.Request.Context(), "post", post.ID, userID, req.Body, scores, nil, decision, flagReason, "", nil)
|
||||
h.moderationService.LogAIDecision(c.Request.Context(), "post", post.ID, userID, req.Body, scores, nil, decision, flagReason, orDecision, nil)
|
||||
}
|
||||
|
||||
// Check for @mentions and notify mentioned users
|
||||
|
|
|
|||
|
|
@ -22,6 +22,8 @@ const (
|
|||
NotificationTypeBeaconReport = "beacon_report"
|
||||
NotificationTypeShare = "share"
|
||||
NotificationTypeQuipReaction = "quip_reaction"
|
||||
NotificationTypeNSFWWarning = "nsfw_warning"
|
||||
NotificationTypeContentRemoved = "content_removed"
|
||||
)
|
||||
|
||||
// NotificationPriority constants
|
||||
|
|
|
|||
|
|
@ -31,5 +31,6 @@ type UserSettings struct {
|
|||
DataSaverMode *bool `json:"data_saver_mode" db:"data_saver_mode"`
|
||||
DefaultPostTtl *int `json:"default_post_ttl" db:"default_post_ttl"`
|
||||
NSFWEnabled *bool `json:"nsfw_enabled" db:"nsfw_enabled"`
|
||||
NSFWBlurEnabled *bool `json:"nsfw_blur_enabled" db:"nsfw_blur_enabled"`
|
||||
UpdatedAt time.Time `json:"updated_at" db:"updated_at"`
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,6 +19,10 @@ func NewPostRepository(pool *pgxpool.Pool) *PostRepository {
|
|||
return &PostRepository{pool: pool}
|
||||
}
|
||||
|
||||
func (r *PostRepository) Pool() *pgxpool.Pool {
|
||||
return r.pool
|
||||
}
|
||||
|
||||
func (r *PostRepository) CreatePost(ctx context.Context, post *models.Post) error {
|
||||
// Calculate confidence score if it's a beacon
|
||||
if post.IsBeacon {
|
||||
|
|
|
|||
|
|
@ -729,7 +729,7 @@ func (r *UserRepository) GetUserSettings(ctx context.Context, userID string) (*m
|
|||
query := `
|
||||
SELECT user_id, theme, language, notifications_enabled, email_notifications,
|
||||
push_notifications, content_filter_level, auto_play_videos, data_saver_mode,
|
||||
default_post_ttl, COALESCE(nsfw_enabled, FALSE), updated_at
|
||||
default_post_ttl, COALESCE(nsfw_enabled, FALSE), COALESCE(nsfw_blur_enabled, TRUE), updated_at
|
||||
FROM public.user_settings
|
||||
WHERE user_id = $1::uuid
|
||||
`
|
||||
|
|
@ -737,7 +737,7 @@ func (r *UserRepository) GetUserSettings(ctx context.Context, userID string) (*m
|
|||
err := r.pool.QueryRow(ctx, query, userID).Scan(
|
||||
&us.UserID, &us.Theme, &us.Language, &us.NotificationsEnabled, &us.EmailNotifications,
|
||||
&us.PushNotifications, &us.ContentFilterLevel, &us.AutoPlayVideos, &us.DataSaverMode,
|
||||
&us.DefaultPostTtl, &us.NSFWEnabled, &us.UpdatedAt,
|
||||
&us.DefaultPostTtl, &us.NSFWEnabled, &us.NSFWBlurEnabled, &us.UpdatedAt,
|
||||
)
|
||||
if err != nil {
|
||||
if err.Error() == "no rows in result set" || err.Error() == "pgx: no rows in result set" {
|
||||
|
|
@ -759,6 +759,7 @@ func (r *UserRepository) GetUserSettings(ctx context.Context, userID string) (*m
|
|||
AutoPlayVideos: &t,
|
||||
DataSaverMode: &f,
|
||||
NSFWEnabled: &f,
|
||||
NSFWBlurEnabled: &t,
|
||||
UpdatedAt: time.Now(),
|
||||
}, nil
|
||||
}
|
||||
|
|
@ -772,8 +773,8 @@ func (r *UserRepository) UpdateUserSettings(ctx context.Context, us *models.User
|
|||
INSERT INTO public.user_settings (
|
||||
user_id, theme, language, notifications_enabled, email_notifications,
|
||||
push_notifications, content_filter_level, auto_play_videos, data_saver_mode,
|
||||
default_post_ttl, nsfw_enabled, updated_at
|
||||
) VALUES ($1::uuid, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, NOW())
|
||||
default_post_ttl, nsfw_enabled, nsfw_blur_enabled, updated_at
|
||||
) VALUES ($1::uuid, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, NOW())
|
||||
ON CONFLICT (user_id) DO UPDATE SET
|
||||
theme = COALESCE(EXCLUDED.theme, user_settings.theme),
|
||||
language = COALESCE(EXCLUDED.language, user_settings.language),
|
||||
|
|
@ -785,12 +786,13 @@ func (r *UserRepository) UpdateUserSettings(ctx context.Context, us *models.User
|
|||
data_saver_mode = COALESCE(EXCLUDED.data_saver_mode, user_settings.data_saver_mode),
|
||||
default_post_ttl = COALESCE(EXCLUDED.default_post_ttl, user_settings.default_post_ttl),
|
||||
nsfw_enabled = COALESCE(EXCLUDED.nsfw_enabled, user_settings.nsfw_enabled),
|
||||
nsfw_blur_enabled = COALESCE(EXCLUDED.nsfw_blur_enabled, user_settings.nsfw_blur_enabled),
|
||||
updated_at = NOW()
|
||||
`
|
||||
_, err := r.pool.Exec(ctx, query,
|
||||
us.UserID, us.Theme, us.Language, us.NotificationsEnabled, us.EmailNotifications,
|
||||
us.PushNotifications, us.ContentFilterLevel, us.AutoPlayVideos, us.DataSaverMode,
|
||||
us.DefaultPostTtl, us.NSFWEnabled,
|
||||
us.DefaultPostTtl, us.NSFWEnabled, us.NSFWBlurEnabled,
|
||||
)
|
||||
return err
|
||||
}
|
||||
|
|
|
|||
|
|
@ -206,6 +206,32 @@ func (s *NotificationService) NotifyBeaconReport(ctx context.Context, beaconAuth
|
|||
})
|
||||
}
|
||||
|
||||
// NotifyNSFWWarning sends a warning when a post is auto-labeled as NSFW
|
||||
func (s *NotificationService) NotifyNSFWWarning(ctx context.Context, authorID string, postID string) error {
|
||||
authorUUID := uuid.MustParse(authorID)
|
||||
return s.sendNotification(ctx, models.PushNotificationRequest{
|
||||
UserID: authorUUID,
|
||||
Type: models.NotificationTypeNSFWWarning,
|
||||
ActorID: authorUUID, // system-generated, actor is self
|
||||
PostID: uuidPtr(postID),
|
||||
PostType: "standard",
|
||||
Priority: models.PriorityHigh,
|
||||
})
|
||||
}
|
||||
|
||||
// NotifyContentRemoved sends a notification when content is removed by AI moderation
|
||||
func (s *NotificationService) NotifyContentRemoved(ctx context.Context, authorID string, postID string) error {
|
||||
authorUUID := uuid.MustParse(authorID)
|
||||
return s.sendNotification(ctx, models.PushNotificationRequest{
|
||||
UserID: authorUUID,
|
||||
Type: models.NotificationTypeContentRemoved,
|
||||
ActorID: authorUUID, // system-generated
|
||||
PostID: uuidPtr(postID),
|
||||
PostType: "standard",
|
||||
Priority: models.PriorityUrgent,
|
||||
})
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Core Send Logic
|
||||
// ============================================================================
|
||||
|
|
@ -402,6 +428,16 @@ func (s *NotificationService) buildPushPayload(req models.PushNotificationReques
|
|||
body = fmt.Sprintf("%s reacted to your quip", actorName)
|
||||
}
|
||||
|
||||
case models.NotificationTypeNSFWWarning:
|
||||
title = "Content Labeled as Sensitive"
|
||||
body = "Your post was automatically labeled as NSFW. Please label sensitive content when posting to avoid further action."
|
||||
data["target"] = "main_feed"
|
||||
|
||||
case models.NotificationTypeContentRemoved:
|
||||
title = "Content Removed"
|
||||
body = "Your post was removed for violating community guidelines. You can appeal this decision in your profile settings."
|
||||
data["target"] = "profile_settings"
|
||||
|
||||
default:
|
||||
title = "Sojorn"
|
||||
body = "You have a new notification"
|
||||
|
|
|
|||
|
|
@ -414,18 +414,66 @@ const defaultModerationSystemPrompt = `You are a content moderation AI for Sojor
|
|||
Analyze the provided content and decide one of three actions:
|
||||
|
||||
1. "clean" — Content is appropriate for all users. No issues.
|
||||
2. "nsfw" — Content is NOT illegal or bannable, but is mature/sensitive. Examples: mild violence, suggestive (but not explicit) imagery, dark humor, intense themes, horror content, heated political speech, depictions of alcohol/smoking. This content will be blurred with a warning label so users who opted in can choose to view it.
|
||||
3. "flag" — Content violates platform policy and should be reviewed by moderators. Examples: explicit nudity/pornography, graphic gore, illegal activity, credible threats, child exploitation, hard drug use instructions, doxxing, extreme hate speech.
|
||||
2. "nsfw" — Content is mature/sensitive but ALLOWED on the platform. It will be blurred behind a warning label for users who have opted in. Think "Cinemax late night" — permissive but not extreme.
|
||||
3. "flag" — Content is NOT ALLOWED and will be removed. The user will receive an appeal notice.
|
||||
|
||||
When unsure, prefer "nsfw" over "flag" — only flag content you believe is clearly illegal or extremely graphic.
|
||||
═══════════════════════════════════════════
|
||||
NUDITY & SEXUAL CONTENT RULES (Cinemax Rule)
|
||||
═══════════════════════════════════════════
|
||||
NSFW (allowed, blurred):
|
||||
- Partial or full nudity (breasts, buttocks, genitalia visible)
|
||||
- Suggestive or sensual poses, lingerie, implied sexual situations
|
||||
- Artistic nude photography, figure drawing, body-positive content
|
||||
- Breastfeeding, non-sexual nudity in natural contexts
|
||||
|
||||
NOT ALLOWED (flag):
|
||||
- Explicit sexual intercourse (penetration, oral sex, any sex acts)
|
||||
- Hardcore pornography of any kind
|
||||
- Any sexual content involving minors (ZERO TOLERANCE — always flag)
|
||||
- Non-consensual sexual content, revenge porn
|
||||
- Bestiality
|
||||
|
||||
═══════════════════════════════════════════
|
||||
VIOLENCE RULES (1-10 Scale)
|
||||
═══════════════════════════════════════════
|
||||
Rate the violence level on a 1-10 scale in your explanation:
|
||||
1-3: Mild (arguments, shoving, cartoon violence) → "clean"
|
||||
4-5: Moderate (blood from injuries, protest footage with blood, boxing/MMA, hunting) → "nsfw"
|
||||
6-7: Graphic (open wounds, significant bloodshed, war footage) → "flag"
|
||||
8-10: Extreme (torture, dismemberment, gore, execution) → "flag"
|
||||
|
||||
Only violence rated 5 or below is allowed. 6+ is always flagged and removed.
|
||||
Protest footage showing blood or injuries = NSFW (4-5), NOT flagged.
|
||||
|
||||
═══════════════════════════════════════════
|
||||
OTHER CONTENT RULES
|
||||
═══════════════════════════════════════════
|
||||
NSFW (allowed, blurred):
|
||||
- Dark humor, edgy memes, intense themes
|
||||
- Horror content, gore in fiction/movies (≤5 on violence scale)
|
||||
- Drug/alcohol references, smoking imagery
|
||||
- Heated political speech, strong profanity
|
||||
- Depictions of self-harm recovery (educational/supportive context)
|
||||
|
||||
NOT ALLOWED (flag):
|
||||
- Credible threats of violence against real people
|
||||
- Doxxing (sharing private info to harass)
|
||||
- Illegal activity instructions (bomb-making, drug synthesis)
|
||||
- Extreme hate speech targeting protected groups
|
||||
- Spam/scam content designed to defraud users
|
||||
- Dangerous medical misinformation that could cause harm
|
||||
- Deepfakes designed to deceive or defame
|
||||
|
||||
When unsure between clean and nsfw, prefer "nsfw" (better safe, user sees it blurred).
|
||||
When unsure between nsfw and flag, prefer "nsfw" — only flag content that clearly crosses the lines above.
|
||||
|
||||
Respond ONLY with a JSON object in this exact format:
|
||||
{
|
||||
"action": "clean" or "nsfw" or "flag",
|
||||
"nsfw_reason": "If action is nsfw, a short label users will see: e.g. 'Violence', 'Suggestive Content', '18+ Themes', 'Gore', 'Drug References'. Empty string if clean or flag.",
|
||||
"nsfw_reason": "If action is nsfw, a short label: e.g. 'Nudity', 'Violence', 'Suggestive Content', '18+ Themes', 'Gore', 'Drug References'. Empty string if clean or flag.",
|
||||
"flagged": true/false,
|
||||
"reason": "one-line summary if flagged or nsfw, empty string if clean",
|
||||
"explanation": "Detailed paragraph explaining your full analysis and why you chose this action.",
|
||||
"explanation": "Detailed paragraph explaining your analysis. For violence, include your 1-10 rating. For nudity, explain what is shown and why it does or does not cross the intercourse line.",
|
||||
"hate": 0.0-1.0,
|
||||
"hate_detail": "What you found or didn't find related to hate/violence/sexual content.",
|
||||
"greed": 0.0-1.0,
|
||||
|
|
|
|||
Loading…
Reference in a new issue