feat: full NSFW system - Cinemax rules, auto-reclassify with warning, not-allowed removal with appeal email, blur toggle setting, user self-labeling

This commit is contained in:
Patrick Britton 2026-02-07 16:58:57 -06:00
parent 68dd8d3544
commit 27b48128fe
7 changed files with 192 additions and 29 deletions

View file

@ -2,6 +2,7 @@ package handlers
import ( import (
"context" "context"
"fmt"
"net/http" "net/http"
"strings" "strings"
"time" "time"
@ -209,6 +210,8 @@ func (h *PostHandler) CreatePost(c *gin.Context) {
BeaconLat *float64 `json:"beacon_lat"` BeaconLat *float64 `json:"beacon_lat"`
BeaconLong *float64 `json:"beacon_long"` BeaconLong *float64 `json:"beacon_long"`
TTLHours *int `json:"ttl_hours"` TTLHours *int `json:"ttl_hours"`
IsNSFW bool `json:"is_nsfw"`
NSFWReason string `json:"nsfw_reason"`
} }
if err := c.ShouldBindJSON(&req); err != nil { if err := c.ShouldBindJSON(&req); err != nil {
@ -293,6 +296,8 @@ func (h *PostHandler) CreatePost(c *gin.Context) {
AllowChain: allowChain, AllowChain: allowChain,
Visibility: "public", Visibility: "public",
ExpiresAt: expiresAt, ExpiresAt: expiresAt,
IsNSFW: req.IsNSFW,
NSFWReason: req.NSFWReason,
Lat: req.BeaconLat, Lat: req.BeaconLat,
Long: req.BeaconLong, Long: req.BeaconLong,
} }
@ -338,18 +343,24 @@ func (h *PostHandler) CreatePost(c *gin.Context) {
} }
// 5b. OpenRouter AI Moderation — NSFW vs Flag decision // 5b. OpenRouter AI Moderation — NSFW vs Flag decision
userSelfLabeledNSFW := req.IsNSFW
orDecision := ""
if h.openRouterService != nil { if h.openRouterService != nil {
orResult, orErr := h.openRouterService.ModerateText(c.Request.Context(), req.Body) orResult, orErr := h.openRouterService.ModerateText(c.Request.Context(), req.Body)
if orErr == nil && orResult != nil { if orErr == nil && orResult != nil {
orDecision = orResult.Action
switch orResult.Action { switch orResult.Action {
case "nsfw": case "nsfw":
post.IsNSFW = true post.IsNSFW = true
if orResult.NSFWReason != "" {
post.NSFWReason = orResult.NSFWReason post.NSFWReason = orResult.NSFWReason
}
if post.Status != "pending_moderation" { if post.Status != "pending_moderation" {
post.Status = "active" // NSFW posts are active but blurred post.Status = "active" // NSFW posts are active but blurred
} }
case "flag": case "flag":
post.Status = "pending_moderation" // NOT ALLOWED — will be removed after creation
post.Status = "removed"
} }
// Update CIS from OpenRouter scores if available // Update CIS from OpenRouter scores if available
if orResult.Hate > 0 || orResult.Greed > 0 || orResult.Delusion > 0 { if orResult.Hate > 0 || orResult.Greed > 0 || orResult.Delusion > 0 {
@ -367,8 +378,7 @@ func (h *PostHandler) CreatePost(c *gin.Context) {
} }
// Handle Flags - Comprehensive Content Flagging // Handle Flags - Comprehensive Content Flagging
if h.moderationService != nil && post.Status == "pending_moderation" { if h.moderationService != nil && (post.Status == "pending_moderation" || post.Status == "removed") {
// Extract all media URLs for flagging
mediaURLs := []string{} mediaURLs := []string{}
if req.ImageURL != nil && *req.ImageURL != "" { if req.ImageURL != nil && *req.ImageURL != "" {
mediaURLs = append(mediaURLs, *req.ImageURL) mediaURLs = append(mediaURLs, *req.ImageURL)
@ -384,6 +394,64 @@ func (h *PostHandler) CreatePost(c *gin.Context) {
_ = h.moderationService.FlagPost(c.Request.Context(), post.ID, scores, reason) _ = h.moderationService.FlagPost(c.Request.Context(), post.ID, scores, reason)
} }
// NSFW auto-reclassify: AI says NSFW but user didn't self-label → send warning
if post.IsNSFW && !userSelfLabeledNSFW && h.notificationService != nil {
go func() {
ctx := context.Background()
h.notificationService.NotifyNSFWWarning(ctx, userID.String(), post.ID.String())
log.Info().Str("post_id", post.ID.String()).Str("author_id", userID.String()).Msg("NSFW warning sent — post auto-labeled")
}()
}
// NOT ALLOWED: AI flagged → post removed, create violation, send appeal notification + email
if post.Status == "removed" && orDecision == "flag" {
go func() {
ctx := context.Background()
// Send in-app notification
if h.notificationService != nil {
h.notificationService.NotifyContentRemoved(ctx, userID.String(), post.ID.String())
}
// Create moderation violation record
if h.moderationService != nil {
h.moderationService.FlagPost(ctx, post.ID, &services.ThreePoisonsScore{Hate: 1.0}, "not_allowed")
}
// Send appeal email — get email from users table, display name from profiles
var userEmail string
h.postRepo.Pool().QueryRow(ctx, `SELECT email FROM users WHERE id = $1`, userID).Scan(&userEmail)
profile, _ := h.userRepo.GetProfileByID(ctx, userID.String())
if userEmail != "" {
displayName := "there"
if profile != nil && profile.DisplayName != nil {
displayName = *profile.DisplayName
}
snippet := req.Body
if len(snippet) > 100 {
snippet = snippet[:100] + "..."
}
appealBody := fmt.Sprintf(
"Hi %s,\n\n"+
"Your recent post on Sojorn was removed because it was found to violate our community guidelines.\n\n"+
"Post content: \"%s\"\n\n"+
"If you believe this was a mistake, you can appeal this decision in your Sojorn app:\n"+
"Go to Profile → Settings → Appeals\n\n"+
"Our moderation team will review your appeal within 48 hours.\n\n"+
"— The Sojorn Team",
displayName, snippet,
)
log.Info().Str("email", userEmail).Msg("Sending content removal appeal email")
h.postRepo.Pool().Exec(ctx,
`INSERT INTO email_queue (to_email, subject, body, created_at) VALUES ($1, $2, $3, NOW()) ON CONFLICT DO NOTHING`,
userEmail, "Your Sojorn post was removed", appealBody,
)
}
log.Warn().Str("post_id", post.ID.String()).Str("author_id", userID.String()).Msg("Post removed by AI moderation — not allowed content")
}()
}
// Log AI moderation decision to audit log // Log AI moderation decision to audit log
if h.moderationService != nil { if h.moderationService != nil {
decision := "pass" decision := "pass"
@ -391,7 +459,9 @@ func (h *PostHandler) CreatePost(c *gin.Context) {
if post.ToneLabel != nil && *post.ToneLabel != "" { if post.ToneLabel != nil && *post.ToneLabel != "" {
flagReason = *post.ToneLabel flagReason = *post.ToneLabel
} }
if post.Status == "pending_moderation" { if post.Status == "removed" {
decision = "flag"
} else if post.Status == "pending_moderation" {
decision = "flag" decision = "flag"
} else if post.IsNSFW { } else if post.IsNSFW {
decision = "nsfw" decision = "nsfw"
@ -403,7 +473,7 @@ func (h *PostHandler) CreatePost(c *gin.Context) {
} else { } else {
scores = &services.ThreePoisonsScore{} scores = &services.ThreePoisonsScore{}
} }
h.moderationService.LogAIDecision(c.Request.Context(), "post", post.ID, userID, req.Body, scores, nil, decision, flagReason, "", nil) h.moderationService.LogAIDecision(c.Request.Context(), "post", post.ID, userID, req.Body, scores, nil, decision, flagReason, orDecision, nil)
} }
// Check for @mentions and notify mentioned users // Check for @mentions and notify mentioned users

View file

@ -22,6 +22,8 @@ const (
NotificationTypeBeaconReport = "beacon_report" NotificationTypeBeaconReport = "beacon_report"
NotificationTypeShare = "share" NotificationTypeShare = "share"
NotificationTypeQuipReaction = "quip_reaction" NotificationTypeQuipReaction = "quip_reaction"
NotificationTypeNSFWWarning = "nsfw_warning"
NotificationTypeContentRemoved = "content_removed"
) )
// NotificationPriority constants // NotificationPriority constants

View file

@ -31,5 +31,6 @@ type UserSettings struct {
DataSaverMode *bool `json:"data_saver_mode" db:"data_saver_mode"` DataSaverMode *bool `json:"data_saver_mode" db:"data_saver_mode"`
DefaultPostTtl *int `json:"default_post_ttl" db:"default_post_ttl"` DefaultPostTtl *int `json:"default_post_ttl" db:"default_post_ttl"`
NSFWEnabled *bool `json:"nsfw_enabled" db:"nsfw_enabled"` NSFWEnabled *bool `json:"nsfw_enabled" db:"nsfw_enabled"`
NSFWBlurEnabled *bool `json:"nsfw_blur_enabled" db:"nsfw_blur_enabled"`
UpdatedAt time.Time `json:"updated_at" db:"updated_at"` UpdatedAt time.Time `json:"updated_at" db:"updated_at"`
} }

View file

@ -19,6 +19,10 @@ func NewPostRepository(pool *pgxpool.Pool) *PostRepository {
return &PostRepository{pool: pool} return &PostRepository{pool: pool}
} }
func (r *PostRepository) Pool() *pgxpool.Pool {
return r.pool
}
func (r *PostRepository) CreatePost(ctx context.Context, post *models.Post) error { func (r *PostRepository) CreatePost(ctx context.Context, post *models.Post) error {
// Calculate confidence score if it's a beacon // Calculate confidence score if it's a beacon
if post.IsBeacon { if post.IsBeacon {

View file

@ -729,7 +729,7 @@ func (r *UserRepository) GetUserSettings(ctx context.Context, userID string) (*m
query := ` query := `
SELECT user_id, theme, language, notifications_enabled, email_notifications, SELECT user_id, theme, language, notifications_enabled, email_notifications,
push_notifications, content_filter_level, auto_play_videos, data_saver_mode, push_notifications, content_filter_level, auto_play_videos, data_saver_mode,
default_post_ttl, COALESCE(nsfw_enabled, FALSE), updated_at default_post_ttl, COALESCE(nsfw_enabled, FALSE), COALESCE(nsfw_blur_enabled, TRUE), updated_at
FROM public.user_settings FROM public.user_settings
WHERE user_id = $1::uuid WHERE user_id = $1::uuid
` `
@ -737,7 +737,7 @@ func (r *UserRepository) GetUserSettings(ctx context.Context, userID string) (*m
err := r.pool.QueryRow(ctx, query, userID).Scan( err := r.pool.QueryRow(ctx, query, userID).Scan(
&us.UserID, &us.Theme, &us.Language, &us.NotificationsEnabled, &us.EmailNotifications, &us.UserID, &us.Theme, &us.Language, &us.NotificationsEnabled, &us.EmailNotifications,
&us.PushNotifications, &us.ContentFilterLevel, &us.AutoPlayVideos, &us.DataSaverMode, &us.PushNotifications, &us.ContentFilterLevel, &us.AutoPlayVideos, &us.DataSaverMode,
&us.DefaultPostTtl, &us.NSFWEnabled, &us.UpdatedAt, &us.DefaultPostTtl, &us.NSFWEnabled, &us.NSFWBlurEnabled, &us.UpdatedAt,
) )
if err != nil { if err != nil {
if err.Error() == "no rows in result set" || err.Error() == "pgx: no rows in result set" { if err.Error() == "no rows in result set" || err.Error() == "pgx: no rows in result set" {
@ -759,6 +759,7 @@ func (r *UserRepository) GetUserSettings(ctx context.Context, userID string) (*m
AutoPlayVideos: &t, AutoPlayVideos: &t,
DataSaverMode: &f, DataSaverMode: &f,
NSFWEnabled: &f, NSFWEnabled: &f,
NSFWBlurEnabled: &t,
UpdatedAt: time.Now(), UpdatedAt: time.Now(),
}, nil }, nil
} }
@ -772,8 +773,8 @@ func (r *UserRepository) UpdateUserSettings(ctx context.Context, us *models.User
INSERT INTO public.user_settings ( INSERT INTO public.user_settings (
user_id, theme, language, notifications_enabled, email_notifications, user_id, theme, language, notifications_enabled, email_notifications,
push_notifications, content_filter_level, auto_play_videos, data_saver_mode, push_notifications, content_filter_level, auto_play_videos, data_saver_mode,
default_post_ttl, nsfw_enabled, updated_at default_post_ttl, nsfw_enabled, nsfw_blur_enabled, updated_at
) VALUES ($1::uuid, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, NOW()) ) VALUES ($1::uuid, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, NOW())
ON CONFLICT (user_id) DO UPDATE SET ON CONFLICT (user_id) DO UPDATE SET
theme = COALESCE(EXCLUDED.theme, user_settings.theme), theme = COALESCE(EXCLUDED.theme, user_settings.theme),
language = COALESCE(EXCLUDED.language, user_settings.language), language = COALESCE(EXCLUDED.language, user_settings.language),
@ -785,12 +786,13 @@ func (r *UserRepository) UpdateUserSettings(ctx context.Context, us *models.User
data_saver_mode = COALESCE(EXCLUDED.data_saver_mode, user_settings.data_saver_mode), data_saver_mode = COALESCE(EXCLUDED.data_saver_mode, user_settings.data_saver_mode),
default_post_ttl = COALESCE(EXCLUDED.default_post_ttl, user_settings.default_post_ttl), default_post_ttl = COALESCE(EXCLUDED.default_post_ttl, user_settings.default_post_ttl),
nsfw_enabled = COALESCE(EXCLUDED.nsfw_enabled, user_settings.nsfw_enabled), nsfw_enabled = COALESCE(EXCLUDED.nsfw_enabled, user_settings.nsfw_enabled),
nsfw_blur_enabled = COALESCE(EXCLUDED.nsfw_blur_enabled, user_settings.nsfw_blur_enabled),
updated_at = NOW() updated_at = NOW()
` `
_, err := r.pool.Exec(ctx, query, _, err := r.pool.Exec(ctx, query,
us.UserID, us.Theme, us.Language, us.NotificationsEnabled, us.EmailNotifications, us.UserID, us.Theme, us.Language, us.NotificationsEnabled, us.EmailNotifications,
us.PushNotifications, us.ContentFilterLevel, us.AutoPlayVideos, us.DataSaverMode, us.PushNotifications, us.ContentFilterLevel, us.AutoPlayVideos, us.DataSaverMode,
us.DefaultPostTtl, us.NSFWEnabled, us.DefaultPostTtl, us.NSFWEnabled, us.NSFWBlurEnabled,
) )
return err return err
} }

View file

@ -206,6 +206,32 @@ func (s *NotificationService) NotifyBeaconReport(ctx context.Context, beaconAuth
}) })
} }
// NotifyNSFWWarning sends a warning when a post is auto-labeled as NSFW
func (s *NotificationService) NotifyNSFWWarning(ctx context.Context, authorID string, postID string) error {
authorUUID := uuid.MustParse(authorID)
return s.sendNotification(ctx, models.PushNotificationRequest{
UserID: authorUUID,
Type: models.NotificationTypeNSFWWarning,
ActorID: authorUUID, // system-generated, actor is self
PostID: uuidPtr(postID),
PostType: "standard",
Priority: models.PriorityHigh,
})
}
// NotifyContentRemoved sends a notification when content is removed by AI moderation
func (s *NotificationService) NotifyContentRemoved(ctx context.Context, authorID string, postID string) error {
authorUUID := uuid.MustParse(authorID)
return s.sendNotification(ctx, models.PushNotificationRequest{
UserID: authorUUID,
Type: models.NotificationTypeContentRemoved,
ActorID: authorUUID, // system-generated
PostID: uuidPtr(postID),
PostType: "standard",
Priority: models.PriorityUrgent,
})
}
// ============================================================================ // ============================================================================
// Core Send Logic // Core Send Logic
// ============================================================================ // ============================================================================
@ -402,6 +428,16 @@ func (s *NotificationService) buildPushPayload(req models.PushNotificationReques
body = fmt.Sprintf("%s reacted to your quip", actorName) body = fmt.Sprintf("%s reacted to your quip", actorName)
} }
case models.NotificationTypeNSFWWarning:
title = "Content Labeled as Sensitive"
body = "Your post was automatically labeled as NSFW. Please label sensitive content when posting to avoid further action."
data["target"] = "main_feed"
case models.NotificationTypeContentRemoved:
title = "Content Removed"
body = "Your post was removed for violating community guidelines. You can appeal this decision in your profile settings."
data["target"] = "profile_settings"
default: default:
title = "Sojorn" title = "Sojorn"
body = "You have a new notification" body = "You have a new notification"

View file

@ -414,18 +414,66 @@ const defaultModerationSystemPrompt = `You are a content moderation AI for Sojor
Analyze the provided content and decide one of three actions: Analyze the provided content and decide one of three actions:
1. "clean" Content is appropriate for all users. No issues. 1. "clean" Content is appropriate for all users. No issues.
2. "nsfw" Content is NOT illegal or bannable, but is mature/sensitive. Examples: mild violence, suggestive (but not explicit) imagery, dark humor, intense themes, horror content, heated political speech, depictions of alcohol/smoking. This content will be blurred with a warning label so users who opted in can choose to view it. 2. "nsfw" Content is mature/sensitive but ALLOWED on the platform. It will be blurred behind a warning label for users who have opted in. Think "Cinemax late night" permissive but not extreme.
3. "flag" Content violates platform policy and should be reviewed by moderators. Examples: explicit nudity/pornography, graphic gore, illegal activity, credible threats, child exploitation, hard drug use instructions, doxxing, extreme hate speech. 3. "flag" Content is NOT ALLOWED and will be removed. The user will receive an appeal notice.
When unsure, prefer "nsfw" over "flag" only flag content you believe is clearly illegal or extremely graphic.
NUDITY & SEXUAL CONTENT RULES (Cinemax Rule)
NSFW (allowed, blurred):
- Partial or full nudity (breasts, buttocks, genitalia visible)
- Suggestive or sensual poses, lingerie, implied sexual situations
- Artistic nude photography, figure drawing, body-positive content
- Breastfeeding, non-sexual nudity in natural contexts
NOT ALLOWED (flag):
- Explicit sexual intercourse (penetration, oral sex, any sex acts)
- Hardcore pornography of any kind
- Any sexual content involving minors (ZERO TOLERANCE always flag)
- Non-consensual sexual content, revenge porn
- Bestiality
VIOLENCE RULES (1-10 Scale)
Rate the violence level on a 1-10 scale in your explanation:
1-3: Mild (arguments, shoving, cartoon violence) "clean"
4-5: Moderate (blood from injuries, protest footage with blood, boxing/MMA, hunting) "nsfw"
6-7: Graphic (open wounds, significant bloodshed, war footage) "flag"
8-10: Extreme (torture, dismemberment, gore, execution) "flag"
Only violence rated 5 or below is allowed. 6+ is always flagged and removed.
Protest footage showing blood or injuries = NSFW (4-5), NOT flagged.
OTHER CONTENT RULES
NSFW (allowed, blurred):
- Dark humor, edgy memes, intense themes
- Horror content, gore in fiction/movies (5 on violence scale)
- Drug/alcohol references, smoking imagery
- Heated political speech, strong profanity
- Depictions of self-harm recovery (educational/supportive context)
NOT ALLOWED (flag):
- Credible threats of violence against real people
- Doxxing (sharing private info to harass)
- Illegal activity instructions (bomb-making, drug synthesis)
- Extreme hate speech targeting protected groups
- Spam/scam content designed to defraud users
- Dangerous medical misinformation that could cause harm
- Deepfakes designed to deceive or defame
When unsure between clean and nsfw, prefer "nsfw" (better safe, user sees it blurred).
When unsure between nsfw and flag, prefer "nsfw" only flag content that clearly crosses the lines above.
Respond ONLY with a JSON object in this exact format: Respond ONLY with a JSON object in this exact format:
{ {
"action": "clean" or "nsfw" or "flag", "action": "clean" or "nsfw" or "flag",
"nsfw_reason": "If action is nsfw, a short label users will see: e.g. 'Violence', 'Suggestive Content', '18+ Themes', 'Gore', 'Drug References'. Empty string if clean or flag.", "nsfw_reason": "If action is nsfw, a short label: e.g. 'Nudity', 'Violence', 'Suggestive Content', '18+ Themes', 'Gore', 'Drug References'. Empty string if clean or flag.",
"flagged": true/false, "flagged": true/false,
"reason": "one-line summary if flagged or nsfw, empty string if clean", "reason": "one-line summary if flagged or nsfw, empty string if clean",
"explanation": "Detailed paragraph explaining your full analysis and why you chose this action.", "explanation": "Detailed paragraph explaining your analysis. For violence, include your 1-10 rating. For nudity, explain what is shown and why it does or does not cross the intercourse line.",
"hate": 0.0-1.0, "hate": 0.0-1.0,
"hate_detail": "What you found or didn't find related to hate/violence/sexual content.", "hate_detail": "What you found or didn't find related to hate/violence/sexual content.",
"greed": 0.0-1.0, "greed": 0.0-1.0,