diff --git a/go-backend/internal/handlers/post_handler.go b/go-backend/internal/handlers/post_handler.go index 7544879..a7e1236 100644 --- a/go-backend/internal/handlers/post_handler.go +++ b/go-backend/internal/handlers/post_handler.go @@ -2,6 +2,7 @@ package handlers import ( "context" + "fmt" "net/http" "strings" "time" @@ -209,6 +210,8 @@ func (h *PostHandler) CreatePost(c *gin.Context) { BeaconLat *float64 `json:"beacon_lat"` BeaconLong *float64 `json:"beacon_long"` TTLHours *int `json:"ttl_hours"` + IsNSFW bool `json:"is_nsfw"` + NSFWReason string `json:"nsfw_reason"` } if err := c.ShouldBindJSON(&req); err != nil { @@ -293,6 +296,8 @@ func (h *PostHandler) CreatePost(c *gin.Context) { AllowChain: allowChain, Visibility: "public", ExpiresAt: expiresAt, + IsNSFW: req.IsNSFW, + NSFWReason: req.NSFWReason, Lat: req.BeaconLat, Long: req.BeaconLong, } @@ -338,18 +343,24 @@ func (h *PostHandler) CreatePost(c *gin.Context) { } // 5b. OpenRouter AI Moderation — NSFW vs Flag decision + userSelfLabeledNSFW := req.IsNSFW + orDecision := "" if h.openRouterService != nil { orResult, orErr := h.openRouterService.ModerateText(c.Request.Context(), req.Body) if orErr == nil && orResult != nil { + orDecision = orResult.Action switch orResult.Action { case "nsfw": post.IsNSFW = true - post.NSFWReason = orResult.NSFWReason + if orResult.NSFWReason != "" { + post.NSFWReason = orResult.NSFWReason + } if post.Status != "pending_moderation" { post.Status = "active" // NSFW posts are active but blurred } case "flag": - post.Status = "pending_moderation" + // NOT ALLOWED — will be removed after creation + post.Status = "removed" } // Update CIS from OpenRouter scores if available if orResult.Hate > 0 || orResult.Greed > 0 || orResult.Delusion > 0 { @@ -367,8 +378,7 @@ func (h *PostHandler) CreatePost(c *gin.Context) { } // Handle Flags - Comprehensive Content Flagging - if h.moderationService != nil && post.Status == "pending_moderation" { - // Extract all media URLs for flagging + if h.moderationService != nil && (post.Status == "pending_moderation" || post.Status == "removed") { mediaURLs := []string{} if req.ImageURL != nil && *req.ImageURL != "" { mediaURLs = append(mediaURLs, *req.ImageURL) @@ -384,6 +394,64 @@ func (h *PostHandler) CreatePost(c *gin.Context) { _ = h.moderationService.FlagPost(c.Request.Context(), post.ID, scores, reason) } + // NSFW auto-reclassify: AI says NSFW but user didn't self-label → send warning + if post.IsNSFW && !userSelfLabeledNSFW && h.notificationService != nil { + go func() { + ctx := context.Background() + h.notificationService.NotifyNSFWWarning(ctx, userID.String(), post.ID.String()) + log.Info().Str("post_id", post.ID.String()).Str("author_id", userID.String()).Msg("NSFW warning sent — post auto-labeled") + }() + } + + // NOT ALLOWED: AI flagged → post removed, create violation, send appeal notification + email + if post.Status == "removed" && orDecision == "flag" { + go func() { + ctx := context.Background() + + // Send in-app notification + if h.notificationService != nil { + h.notificationService.NotifyContentRemoved(ctx, userID.String(), post.ID.String()) + } + + // Create moderation violation record + if h.moderationService != nil { + h.moderationService.FlagPost(ctx, post.ID, &services.ThreePoisonsScore{Hate: 1.0}, "not_allowed") + } + + // Send appeal email — get email from users table, display name from profiles + var userEmail string + h.postRepo.Pool().QueryRow(ctx, `SELECT email FROM users WHERE id = $1`, userID).Scan(&userEmail) + profile, _ := h.userRepo.GetProfileByID(ctx, userID.String()) + if userEmail != "" { + displayName := "there" + if profile != nil && profile.DisplayName != nil { + displayName = *profile.DisplayName + } + snippet := req.Body + if len(snippet) > 100 { + snippet = snippet[:100] + "..." + } + appealBody := fmt.Sprintf( + "Hi %s,\n\n"+ + "Your recent post on Sojorn was removed because it was found to violate our community guidelines.\n\n"+ + "Post content: \"%s\"\n\n"+ + "If you believe this was a mistake, you can appeal this decision in your Sojorn app:\n"+ + "Go to Profile → Settings → Appeals\n\n"+ + "Our moderation team will review your appeal within 48 hours.\n\n"+ + "— The Sojorn Team", + displayName, snippet, + ) + log.Info().Str("email", userEmail).Msg("Sending content removal appeal email") + h.postRepo.Pool().Exec(ctx, + `INSERT INTO email_queue (to_email, subject, body, created_at) VALUES ($1, $2, $3, NOW()) ON CONFLICT DO NOTHING`, + userEmail, "Your Sojorn post was removed", appealBody, + ) + } + + log.Warn().Str("post_id", post.ID.String()).Str("author_id", userID.String()).Msg("Post removed by AI moderation — not allowed content") + }() + } + // Log AI moderation decision to audit log if h.moderationService != nil { decision := "pass" @@ -391,7 +459,9 @@ func (h *PostHandler) CreatePost(c *gin.Context) { if post.ToneLabel != nil && *post.ToneLabel != "" { flagReason = *post.ToneLabel } - if post.Status == "pending_moderation" { + if post.Status == "removed" { + decision = "flag" + } else if post.Status == "pending_moderation" { decision = "flag" } else if post.IsNSFW { decision = "nsfw" @@ -403,7 +473,7 @@ func (h *PostHandler) CreatePost(c *gin.Context) { } else { scores = &services.ThreePoisonsScore{} } - h.moderationService.LogAIDecision(c.Request.Context(), "post", post.ID, userID, req.Body, scores, nil, decision, flagReason, "", nil) + h.moderationService.LogAIDecision(c.Request.Context(), "post", post.ID, userID, req.Body, scores, nil, decision, flagReason, orDecision, nil) } // Check for @mentions and notify mentioned users diff --git a/go-backend/internal/models/notification.go b/go-backend/internal/models/notification.go index e0df6de..e1eebd6 100644 --- a/go-backend/internal/models/notification.go +++ b/go-backend/internal/models/notification.go @@ -9,19 +9,21 @@ import ( // NotificationType constants for type safety const ( - NotificationTypeLike = "like" - NotificationTypeComment = "comment" - NotificationTypeReply = "reply" - NotificationTypeMention = "mention" - NotificationTypeFollow = "follow" - NotificationTypeFollowRequest = "follow_request" - NotificationTypeFollowAccept = "follow_accepted" - NotificationTypeMessage = "message" - NotificationTypeSave = "save" - NotificationTypeBeaconVouch = "beacon_vouch" - NotificationTypeBeaconReport = "beacon_report" - NotificationTypeShare = "share" - NotificationTypeQuipReaction = "quip_reaction" + NotificationTypeLike = "like" + NotificationTypeComment = "comment" + NotificationTypeReply = "reply" + NotificationTypeMention = "mention" + NotificationTypeFollow = "follow" + NotificationTypeFollowRequest = "follow_request" + NotificationTypeFollowAccept = "follow_accepted" + NotificationTypeMessage = "message" + NotificationTypeSave = "save" + NotificationTypeBeaconVouch = "beacon_vouch" + NotificationTypeBeaconReport = "beacon_report" + NotificationTypeShare = "share" + NotificationTypeQuipReaction = "quip_reaction" + NotificationTypeNSFWWarning = "nsfw_warning" + NotificationTypeContentRemoved = "content_removed" ) // NotificationPriority constants diff --git a/go-backend/internal/models/settings.go b/go-backend/internal/models/settings.go index 921f473..86a52b9 100644 --- a/go-backend/internal/models/settings.go +++ b/go-backend/internal/models/settings.go @@ -31,5 +31,6 @@ type UserSettings struct { DataSaverMode *bool `json:"data_saver_mode" db:"data_saver_mode"` DefaultPostTtl *int `json:"default_post_ttl" db:"default_post_ttl"` NSFWEnabled *bool `json:"nsfw_enabled" db:"nsfw_enabled"` + NSFWBlurEnabled *bool `json:"nsfw_blur_enabled" db:"nsfw_blur_enabled"` UpdatedAt time.Time `json:"updated_at" db:"updated_at"` } diff --git a/go-backend/internal/repository/post_repository.go b/go-backend/internal/repository/post_repository.go index 6d7078b..40450e9 100644 --- a/go-backend/internal/repository/post_repository.go +++ b/go-backend/internal/repository/post_repository.go @@ -19,6 +19,10 @@ func NewPostRepository(pool *pgxpool.Pool) *PostRepository { return &PostRepository{pool: pool} } +func (r *PostRepository) Pool() *pgxpool.Pool { + return r.pool +} + func (r *PostRepository) CreatePost(ctx context.Context, post *models.Post) error { // Calculate confidence score if it's a beacon if post.IsBeacon { diff --git a/go-backend/internal/repository/user_repository.go b/go-backend/internal/repository/user_repository.go index 81b2372..ee0a784 100644 --- a/go-backend/internal/repository/user_repository.go +++ b/go-backend/internal/repository/user_repository.go @@ -729,7 +729,7 @@ func (r *UserRepository) GetUserSettings(ctx context.Context, userID string) (*m query := ` SELECT user_id, theme, language, notifications_enabled, email_notifications, push_notifications, content_filter_level, auto_play_videos, data_saver_mode, - default_post_ttl, COALESCE(nsfw_enabled, FALSE), updated_at + default_post_ttl, COALESCE(nsfw_enabled, FALSE), COALESCE(nsfw_blur_enabled, TRUE), updated_at FROM public.user_settings WHERE user_id = $1::uuid ` @@ -737,7 +737,7 @@ func (r *UserRepository) GetUserSettings(ctx context.Context, userID string) (*m err := r.pool.QueryRow(ctx, query, userID).Scan( &us.UserID, &us.Theme, &us.Language, &us.NotificationsEnabled, &us.EmailNotifications, &us.PushNotifications, &us.ContentFilterLevel, &us.AutoPlayVideos, &us.DataSaverMode, - &us.DefaultPostTtl, &us.NSFWEnabled, &us.UpdatedAt, + &us.DefaultPostTtl, &us.NSFWEnabled, &us.NSFWBlurEnabled, &us.UpdatedAt, ) if err != nil { if err.Error() == "no rows in result set" || err.Error() == "pgx: no rows in result set" { @@ -759,6 +759,7 @@ func (r *UserRepository) GetUserSettings(ctx context.Context, userID string) (*m AutoPlayVideos: &t, DataSaverMode: &f, NSFWEnabled: &f, + NSFWBlurEnabled: &t, UpdatedAt: time.Now(), }, nil } @@ -772,8 +773,8 @@ func (r *UserRepository) UpdateUserSettings(ctx context.Context, us *models.User INSERT INTO public.user_settings ( user_id, theme, language, notifications_enabled, email_notifications, push_notifications, content_filter_level, auto_play_videos, data_saver_mode, - default_post_ttl, nsfw_enabled, updated_at - ) VALUES ($1::uuid, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, NOW()) + default_post_ttl, nsfw_enabled, nsfw_blur_enabled, updated_at + ) VALUES ($1::uuid, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, NOW()) ON CONFLICT (user_id) DO UPDATE SET theme = COALESCE(EXCLUDED.theme, user_settings.theme), language = COALESCE(EXCLUDED.language, user_settings.language), @@ -785,12 +786,13 @@ func (r *UserRepository) UpdateUserSettings(ctx context.Context, us *models.User data_saver_mode = COALESCE(EXCLUDED.data_saver_mode, user_settings.data_saver_mode), default_post_ttl = COALESCE(EXCLUDED.default_post_ttl, user_settings.default_post_ttl), nsfw_enabled = COALESCE(EXCLUDED.nsfw_enabled, user_settings.nsfw_enabled), + nsfw_blur_enabled = COALESCE(EXCLUDED.nsfw_blur_enabled, user_settings.nsfw_blur_enabled), updated_at = NOW() ` _, err := r.pool.Exec(ctx, query, us.UserID, us.Theme, us.Language, us.NotificationsEnabled, us.EmailNotifications, us.PushNotifications, us.ContentFilterLevel, us.AutoPlayVideos, us.DataSaverMode, - us.DefaultPostTtl, us.NSFWEnabled, + us.DefaultPostTtl, us.NSFWEnabled, us.NSFWBlurEnabled, ) return err } diff --git a/go-backend/internal/services/notification_service.go b/go-backend/internal/services/notification_service.go index 038c279..a3cd1aa 100644 --- a/go-backend/internal/services/notification_service.go +++ b/go-backend/internal/services/notification_service.go @@ -206,6 +206,32 @@ func (s *NotificationService) NotifyBeaconReport(ctx context.Context, beaconAuth }) } +// NotifyNSFWWarning sends a warning when a post is auto-labeled as NSFW +func (s *NotificationService) NotifyNSFWWarning(ctx context.Context, authorID string, postID string) error { + authorUUID := uuid.MustParse(authorID) + return s.sendNotification(ctx, models.PushNotificationRequest{ + UserID: authorUUID, + Type: models.NotificationTypeNSFWWarning, + ActorID: authorUUID, // system-generated, actor is self + PostID: uuidPtr(postID), + PostType: "standard", + Priority: models.PriorityHigh, + }) +} + +// NotifyContentRemoved sends a notification when content is removed by AI moderation +func (s *NotificationService) NotifyContentRemoved(ctx context.Context, authorID string, postID string) error { + authorUUID := uuid.MustParse(authorID) + return s.sendNotification(ctx, models.PushNotificationRequest{ + UserID: authorUUID, + Type: models.NotificationTypeContentRemoved, + ActorID: authorUUID, // system-generated + PostID: uuidPtr(postID), + PostType: "standard", + Priority: models.PriorityUrgent, + }) +} + // ============================================================================ // Core Send Logic // ============================================================================ @@ -402,6 +428,16 @@ func (s *NotificationService) buildPushPayload(req models.PushNotificationReques body = fmt.Sprintf("%s reacted to your quip", actorName) } + case models.NotificationTypeNSFWWarning: + title = "Content Labeled as Sensitive" + body = "Your post was automatically labeled as NSFW. Please label sensitive content when posting to avoid further action." + data["target"] = "main_feed" + + case models.NotificationTypeContentRemoved: + title = "Content Removed" + body = "Your post was removed for violating community guidelines. You can appeal this decision in your profile settings." + data["target"] = "profile_settings" + default: title = "Sojorn" body = "You have a new notification" diff --git a/go-backend/internal/services/openrouter_service.go b/go-backend/internal/services/openrouter_service.go index 0e69a70..e42c394 100644 --- a/go-backend/internal/services/openrouter_service.go +++ b/go-backend/internal/services/openrouter_service.go @@ -414,18 +414,66 @@ const defaultModerationSystemPrompt = `You are a content moderation AI for Sojor Analyze the provided content and decide one of three actions: 1. "clean" — Content is appropriate for all users. No issues. -2. "nsfw" — Content is NOT illegal or bannable, but is mature/sensitive. Examples: mild violence, suggestive (but not explicit) imagery, dark humor, intense themes, horror content, heated political speech, depictions of alcohol/smoking. This content will be blurred with a warning label so users who opted in can choose to view it. -3. "flag" — Content violates platform policy and should be reviewed by moderators. Examples: explicit nudity/pornography, graphic gore, illegal activity, credible threats, child exploitation, hard drug use instructions, doxxing, extreme hate speech. +2. "nsfw" — Content is mature/sensitive but ALLOWED on the platform. It will be blurred behind a warning label for users who have opted in. Think "Cinemax late night" — permissive but not extreme. +3. "flag" — Content is NOT ALLOWED and will be removed. The user will receive an appeal notice. -When unsure, prefer "nsfw" over "flag" — only flag content you believe is clearly illegal or extremely graphic. +═══════════════════════════════════════════ +NUDITY & SEXUAL CONTENT RULES (Cinemax Rule) +═══════════════════════════════════════════ +NSFW (allowed, blurred): +- Partial or full nudity (breasts, buttocks, genitalia visible) +- Suggestive or sensual poses, lingerie, implied sexual situations +- Artistic nude photography, figure drawing, body-positive content +- Breastfeeding, non-sexual nudity in natural contexts + +NOT ALLOWED (flag): +- Explicit sexual intercourse (penetration, oral sex, any sex acts) +- Hardcore pornography of any kind +- Any sexual content involving minors (ZERO TOLERANCE — always flag) +- Non-consensual sexual content, revenge porn +- Bestiality + +═══════════════════════════════════════════ +VIOLENCE RULES (1-10 Scale) +═══════════════════════════════════════════ +Rate the violence level on a 1-10 scale in your explanation: + 1-3: Mild (arguments, shoving, cartoon violence) → "clean" + 4-5: Moderate (blood from injuries, protest footage with blood, boxing/MMA, hunting) → "nsfw" + 6-7: Graphic (open wounds, significant bloodshed, war footage) → "flag" + 8-10: Extreme (torture, dismemberment, gore, execution) → "flag" + +Only violence rated 5 or below is allowed. 6+ is always flagged and removed. +Protest footage showing blood or injuries = NSFW (4-5), NOT flagged. + +═══════════════════════════════════════════ +OTHER CONTENT RULES +═══════════════════════════════════════════ +NSFW (allowed, blurred): +- Dark humor, edgy memes, intense themes +- Horror content, gore in fiction/movies (≤5 on violence scale) +- Drug/alcohol references, smoking imagery +- Heated political speech, strong profanity +- Depictions of self-harm recovery (educational/supportive context) + +NOT ALLOWED (flag): +- Credible threats of violence against real people +- Doxxing (sharing private info to harass) +- Illegal activity instructions (bomb-making, drug synthesis) +- Extreme hate speech targeting protected groups +- Spam/scam content designed to defraud users +- Dangerous medical misinformation that could cause harm +- Deepfakes designed to deceive or defame + +When unsure between clean and nsfw, prefer "nsfw" (better safe, user sees it blurred). +When unsure between nsfw and flag, prefer "nsfw" — only flag content that clearly crosses the lines above. Respond ONLY with a JSON object in this exact format: { "action": "clean" or "nsfw" or "flag", - "nsfw_reason": "If action is nsfw, a short label users will see: e.g. 'Violence', 'Suggestive Content', '18+ Themes', 'Gore', 'Drug References'. Empty string if clean or flag.", + "nsfw_reason": "If action is nsfw, a short label: e.g. 'Nudity', 'Violence', 'Suggestive Content', '18+ Themes', 'Gore', 'Drug References'. Empty string if clean or flag.", "flagged": true/false, "reason": "one-line summary if flagged or nsfw, empty string if clean", - "explanation": "Detailed paragraph explaining your full analysis and why you chose this action.", + "explanation": "Detailed paragraph explaining your analysis. For violence, include your 1-10 rating. For nudity, explain what is shown and why it does or does not cross the intercourse line.", "hate": 0.0-1.0, "hate_detail": "What you found or didn't find related to hate/violence/sexual content.", "greed": 0.0-1.0,