Add layered content moderation: hard blocklist + strike system + client-side filter

2026-02-06 11:46:30 -06:00 · 2026-02-06 11:46:30 -06:00 · f6c4bb88e0
parent 35740f3fc6
commit f6c4bb88e0
6 changed files with 391 additions and 5 deletions
--- a/go-backend/cmd/api/main.go
+++ b/go-backend/cmd/api/main.go
@ -120,11 +120,14 @@ func main() {
 	// Initialize appeal service
 	appealService := services.NewAppealService(dbPool)

+	// Initialize content filter (hard blocklist + strike system)
+	contentFilter := services.NewContentFilter(dbPool)
+
 	hub := realtime.NewHub()
 	wsHandler := handlers.NewWSHandler(hub, cfg.JWTSecret)

 	userHandler := handlers.NewUserHandler(userRepo, postRepo, notificationService, assetService)
-	postHandler := handlers.NewPostHandler(postRepo, userRepo, feedService, assetService, notificationService, moderationService)
+	postHandler := handlers.NewPostHandler(postRepo, userRepo, feedService, assetService, notificationService, moderationService, contentFilter)
 	chatHandler := handlers.NewChatHandler(chatRepo, notificationService, hub)
 	authHandler := handlers.NewAuthHandler(userRepo, cfg, emailService)
 	categoryHandler := handlers.NewCategoryHandler(categoryRepo)
--- a/go-backend/internal/handlers/post_handler.go
+++ b/go-backend/internal/handlers/post_handler.go
@ -22,9 +22,10 @@ type PostHandler struct {
 	assetService        *services.AssetService
 	notificationService *services.NotificationService
 	moderationService   *services.ModerationService
+	contentFilter       *services.ContentFilter
 }

-func NewPostHandler(postRepo *repository.PostRepository, userRepo *repository.UserRepository, feedService *services.FeedService, assetService *services.AssetService, notificationService *services.NotificationService, moderationService *services.ModerationService) *PostHandler {
+func NewPostHandler(postRepo *repository.PostRepository, userRepo *repository.UserRepository, feedService *services.FeedService, assetService *services.AssetService, notificationService *services.NotificationService, moderationService *services.ModerationService, contentFilter *services.ContentFilter) *PostHandler {
 	return &PostHandler{
 		postRepo:            postRepo,
 		userRepo:            userRepo,
@ -32,6 +33,7 @@ func NewPostHandler(postRepo *repository.PostRepository, userRepo *repository.Us
 		assetService:        assetService,
 		notificationService: notificationService,
 		moderationService:   moderationService,
+		contentFilter:       contentFilter,
 	}
 }

@ -55,6 +57,23 @@ func (h *PostHandler) CreateComment(c *gin.Context) {
 		return
 	}

+	// Layer 0: Hard blocklist check — reject immediately, never save
+	if h.contentFilter != nil {
+		result := h.contentFilter.CheckContent(req.Body)
+		if result.Blocked {
+			// Record strike
+			strikeCount, consequence, _ := h.contentFilter.RecordStrike(c.Request.Context(), userID, result.Category, req.Body)
+			c.JSON(http.StatusUnprocessableEntity, gin.H{
+				"error":       result.Message,
+				"blocked":     true,
+				"category":    result.Category,
+				"strikes":     strikeCount,
+				"consequence": consequence,
+			})
+			return
+		}
+	}
+
 	tags := utils.ExtractHashtags(req.Body)
 	tone := "neutral"
 	cis := 0.8
@ -185,6 +204,22 @@ func (h *PostHandler) CreatePost(c *gin.Context) {
 		return
 	}

+	// Layer 0: Hard blocklist check — reject immediately, never save
+	if h.contentFilter != nil {
+		result := h.contentFilter.CheckContent(req.Body)
+		if result.Blocked {
+			strikeCount, consequence, _ := h.contentFilter.RecordStrike(c.Request.Context(), userID, result.Category, req.Body)
+			c.JSON(http.StatusUnprocessableEntity, gin.H{
+				"error":       result.Message,
+				"blocked":     true,
+				"category":    result.Category,
+				"strikes":     strikeCount,
+				"consequence": consequence,
+			})
+			return
+		}
+	}
+
 	// 1. Check rate limit (Simplification)
 	trustState, err := h.userRepo.GetTrustState(c.Request.Context(), userID.String())
 	if err == nil && trustState.PostsToday >= 50 { // Example hard limit
--- a/go-backend/internal/services/content_filter.go
+++ b/go-backend/internal/services/content_filter.go
@ -0,0 +1,210 @@
+package services
+
+import (
+	"context"
+	"fmt"
+	"regexp"
+	"strings"
+	"time"
+
+	"github.com/google/uuid"
+	"github.com/jackc/pgx/v5/pgxpool"
+)
+
+// ContentFilter provides hard blocklist checking and strike tracking.
+// Layer 0: Instant rejection for obvious slurs — post never saves.
+type ContentFilter struct {
+	pool     *pgxpool.Pool
+	patterns []*blockedPattern
+}
+
+type blockedPattern struct {
+	regex    *regexp.Regexp
+	category string // "slur", "threat", etc.
+	severity string // "hard" = instant block, "soft" = warning
+}
+
+// ContentCheckResult is returned by CheckContent.
+type ContentCheckResult struct {
+	Blocked  bool   `json:"blocked"`
+	Category string `json:"category,omitempty"`
+	Message  string `json:"message,omitempty"`
+}
+
+func NewContentFilter(pool *pgxpool.Pool) *ContentFilter {
+	cf := &ContentFilter{pool: pool}
+	cf.buildPatterns()
+	return cf
+}
+
+// buildPatterns compiles regex patterns for slur detection.
+// Uses word-boundary-aware patterns that catch common evasion tactics:
+//   - Spacing (n i g g e r)
+//   - Leetspeak (n1gg3r)
+//   - Repeated chars (niggger)
+//   - Partial masking (n*gger, n**ga)
+func (cf *ContentFilter) buildPatterns() {
+	type entry struct {
+		pattern  string
+		category string
+		severity string
+	}
+
+	// Hard-blocked slurs — these NEVER get posted.
+	// Patterns use (?i) for case-insensitive and flexible char matching.
+	entries := []entry{
+		// N-word and variants
+		{`(?i)\bn[i1!|l][gq9][gq9]+[e3a@]?[r0d]?s?\b`, "slur", "hard"},
+		{`(?i)\bn[i1!|l][gq9]+[aA@]\b`, "slur", "hard"},
+		{`(?i)\bn\s*[i1!]\s*[gq9]\s*[gq9]\s*[e3a]?\s*[r0]?\b`, "slur", "hard"},
+
+		// F-word (homophobic slur) and variants
+		{`(?i)\bf[a@4][gq9][gq9]?[o0]?[t7]?s?\b`, "slur", "hard"},
+		{`(?i)\bf\s*[a@4]\s*[gq9]\s*[gq9]?\s*[o0]?\s*[t7]?\b`, "slur", "hard"},
+
+		// K-word (anti-Jewish slur)
+		{`(?i)\bk[i1][k]+[e3]?s?\b`, "slur", "hard"},
+
+		// C-word (racial slur against Asian people)
+		{`(?i)\bch[i1]n[k]+s?\b`, "slur", "hard"},
+
+		// S-word (anti-Hispanic slur)
+		{`(?i)\bsp[i1][ck]+s?\b`, "slur", "hard"},
+
+		// W-word (racial slur)
+		{`(?i)\bw[e3][t7]b[a@]ck+s?\b`, "slur", "hard"},
+
+		// R-word (ableist slur)
+		{`(?i)\br[e3]t[a@]rd+s?\b`, "slur", "hard"},
+
+		// T-word (transphobic slur)
+		{`(?i)\btr[a@4]nn[yie]+s?\b`, "slur", "hard"},
+
+		// Direct death/violence threats
+		{`(?i)\b(i('?m| am) go(ing|nna)|i('?ll| will)) (to )?(kill|murder|shoot|stab|rape)\b`, "threat", "hard"},
+		{`(?i)\b(kill|murder|shoot|stab|rape) (you|them|him|her|all)\b`, "threat", "hard"},
+	}
+
+	cf.patterns = make([]*blockedPattern, 0, len(entries))
+	for _, e := range entries {
+		re, err := regexp.Compile(e.pattern)
+		if err != nil {
+			fmt.Printf("Content filter: failed to compile pattern %q: %v\n", e.pattern, err)
+			continue
+		}
+		cf.patterns = append(cf.patterns, &blockedPattern{
+			regex:    re,
+			category: e.category,
+			severity: e.severity,
+		})
+	}
+
+	fmt.Printf("Content filter: loaded %d patterns\n", len(cf.patterns))
+}
+
+// CheckContent scans text against the hard blocklist.
+// Returns immediately on first match — no need to check all patterns.
+func (cf *ContentFilter) CheckContent(text string) *ContentCheckResult {
+	if text == "" {
+		return &ContentCheckResult{Blocked: false}
+	}
+
+	// Normalize: collapse whitespace, strip zero-width chars
+	normalized := normalizeText(text)
+
+	for _, p := range cf.patterns {
+		if p.severity == "hard" && p.regex.MatchString(normalized) {
+			return &ContentCheckResult{
+				Blocked:  true,
+				Category: p.category,
+				Message:  "This content contains language that isn't allowed on Sojorn. Please revise your post.",
+			}
+		}
+	}
+
+	return &ContentCheckResult{Blocked: false}
+}
+
+// RecordStrike records a content violation strike against a user.
+// Strike escalation:
+//
+//	1-2 strikes: warning (post blocked, user informed)
+//	3 strikes:   24-hour posting suspension
+//	5 strikes:   7-day suspension
+//	7+ strikes:  permanent ban
+func (cf *ContentFilter) RecordStrike(ctx context.Context, userID uuid.UUID, category, content string) (int, string, error) {
+	// Insert strike
+	_, err := cf.pool.Exec(ctx, `
+		INSERT INTO content_strikes (user_id, category, content_snippet, created_at)
+		VALUES ($1, $2, $3, NOW())
+	`, userID, category, truncate(content, 100))
+	if err != nil {
+		return 0, "", fmt.Errorf("failed to record strike: %w", err)
+	}
+
+	// Count recent strikes (last 30 days)
+	var count int
+	err = cf.pool.QueryRow(ctx, `
+		SELECT COUNT(*) FROM content_strikes
+		WHERE user_id = $1 AND created_at > NOW() - INTERVAL '30 days'
+	`, userID).Scan(&count)
+	if err != nil {
+		return 0, "", fmt.Errorf("failed to count strikes: %w", err)
+	}
+
+	// Determine consequence
+	consequence := "warning"
+	switch {
+	case count >= 7:
+		consequence = "ban"
+		cf.pool.Exec(ctx, `UPDATE users SET status = 'banned' WHERE id = $1`, userID)
+		fmt.Printf("Content filter: user %s BANNED (%d strikes)\n", userID, count)
+	case count >= 5:
+		consequence = "suspend_7d"
+		suspendUntil := time.Now().Add(7 * 24 * time.Hour)
+		cf.pool.Exec(ctx, `UPDATE users SET status = 'suspended', suspended_until = $2 WHERE id = $1`, userID, suspendUntil)
+		fmt.Printf("Content filter: user %s suspended 7 days (%d strikes)\n", userID, count)
+	case count >= 3:
+		consequence = "suspend_24h"
+		suspendUntil := time.Now().Add(24 * time.Hour)
+		cf.pool.Exec(ctx, `UPDATE users SET status = 'suspended', suspended_until = $2 WHERE id = $1`, userID, suspendUntil)
+		fmt.Printf("Content filter: user %s suspended 24h (%d strikes)\n", userID, count)
+	default:
+		fmt.Printf("Content filter: user %s warning (%d strikes)\n", userID, count)
+	}
+
+	return count, consequence, nil
+}
+
+// GetUserStrikes returns the number of recent strikes for a user.
+func (cf *ContentFilter) GetUserStrikes(ctx context.Context, userID uuid.UUID) (int, error) {
+	var count int
+	err := cf.pool.QueryRow(ctx, `
+		SELECT COUNT(*) FROM content_strikes
+		WHERE user_id = $1 AND created_at > NOW() - INTERVAL '30 days'
+	`, userID).Scan(&count)
+	return count, err
+}
+
+// normalizeText strips common evasion characters and collapses spacing.
+func normalizeText(text string) string {
+	// Remove zero-width characters
+	text = strings.ReplaceAll(text, "\u200b", "") // zero-width space
+	text = strings.ReplaceAll(text, "\u200c", "") // zero-width non-joiner
+	text = strings.ReplaceAll(text, "\u200d", "") // zero-width joiner
+	text = strings.ReplaceAll(text, "\ufeff", "")  // BOM
+
+	// Remove common separator characters used to evade filters
+	for _, ch := range []string{".", "-", "_", "*", "|"} {
+		text = strings.ReplaceAll(text, ch, "")
+	}
+
+	return text
+}
+
+func truncate(s string, maxLen int) string {
+	if len(s) <= maxLen {
+		return s
+	}
+	return s[:maxLen]
+}
--- a/go-backend/scripts/create_content_strikes.sql
+++ b/go-backend/scripts/create_content_strikes.sql
@ -0,0 +1,18 @@
+CREATE TABLE IF NOT EXISTS content_strikes (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
+    category TEXT NOT NULL,
+    content_snippet TEXT,
+    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
+);
+
+CREATE INDEX IF NOT EXISTS idx_content_strikes_user_id ON content_strikes(user_id);
+CREATE INDEX IF NOT EXISTS idx_content_strikes_created_at ON content_strikes(created_at);
+
+-- Add suspended_until column to users if not exists
+DO $$
+BEGIN
+    IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name='users' AND column_name='suspended_until') THEN
+        ALTER TABLE users ADD COLUMN suspended_until TIMESTAMP WITH TIME ZONE;
+    END IF;
+END $$;
--- a/sojorn_app/lib/screens/compose/compose_screen.dart
+++ b/sojorn_app/lib/screens/compose/compose_screen.dart
@ -15,6 +15,7 @@ import '../../providers/feed_refresh_provider.dart';
 import '../../services/image_upload_service.dart';
 import '../../theme/app_theme.dart';
 import '../../widgets/composer/composer_toolbar.dart';
+import '../../services/content_filter.dart';
 import '../../widgets/sojorn_snackbar.dart';
 import 'image_editor_screen.dart';
 import '../quips/create/quip_studio_screen.dart'; // Added import
@ -322,6 +323,13 @@ class _ComposeScreenState extends ConsumerState<ComposeScreen> {
      return;
    }

+    // Layer 0: Client-side hard blocklist — never even send to server
+    final blockMessage = ContentFilter.instance.check(_bodyController.text.trim());
+    if (blockMessage != null) {
+      await _showBlockedDialog(blockMessage);
+      return;
+    }
+
    setState(() {
      _isLoading = true;
      _errorMessage = null;
@ -400,9 +408,15 @@ class _ComposeScreenState extends ConsumerState<ComposeScreen> {
            'Content verification temporarily unavailable. Please try again.';
      });
    } catch (e) {
-      setState(() {
-        _errorMessage = e.toString().replaceAll('Exception: ', '');
-      });
+      final msg = e.toString().replaceAll('Exception: ', '');
+      // Server-side blocklist catch (422 with blocked content message)
+      if (msg.contains("isn't allowed on Sojorn") || msg.contains('not allowed')) {
+        if (mounted) await _showBlockedDialog(msg);
+      } else {
+        setState(() {
+          _errorMessage = msg;
+        });
+      }
    } finally {
      if (mounted) {
        setState(() {
@ -440,6 +454,38 @@ class _ComposeScreenState extends ConsumerState<ComposeScreen> {
    return result ?? false;
  }

+  Future<void> _showBlockedDialog(String message) async {
+    await showDialog<void>(
+      context: context,
+      barrierDismissible: false,
+      builder: (context) => AlertDialog(
+        shape: RoundedRectangleBorder(borderRadius: BorderRadius.circular(16)),
+        title: Row(
+          children: [
+            Icon(Icons.block, color: AppTheme.error, size: 24),
+            const SizedBox(width: 8),
+            const Text('Not Allowed'),
+          ],
+        ),
+        content: Text(
+          message,
+          style: AppTheme.textTheme.bodyMedium,
+        ),
+        actions: [
+          ElevatedButton(
+            onPressed: () => Navigator.pop(context),
+            style: ElevatedButton.styleFrom(
+              backgroundColor: AppTheme.brightNavy,
+              foregroundColor: AppTheme.white,
+              shape: const StadiumBorder(),
+            ),
+            child: const Text('Edit My Post'),
+          ),
+        ],
+      ),
+    );
+  }
+
  bool get _canPublish {
    return _bodyController.text.trim().isNotEmpty &&
        _bodyController.text.trim().length <= _maxCharacters &&
--- a/sojorn_app/lib/services/content_filter.dart
+++ b/sojorn_app/lib/services/content_filter.dart
@ -0,0 +1,74 @@
+/// Client-side content filter for Sojorn.
+/// Layer 0: Catches obvious slurs BEFORE sending to server.
+/// This prevents the post from ever leaving the device.
+class ContentFilter {
+  ContentFilter._();
+  static final instance = ContentFilter._();
+
+  /// Check text for hard-blocked content.
+  /// Returns null if clean, or a user-friendly message if blocked.
+  String? check(String text) {
+    if (text.isEmpty) return null;
+
+    final normalized = _normalize(text);
+
+    for (final pattern in _hardBlockPatterns) {
+      if (pattern.hasMatch(normalized)) {
+        return "We don't allow that kind of language on Sojorn. Please revise your post.";
+      }
+    }
+
+    return null;
+  }
+
+  /// Normalize text to catch common evasion tactics.
+  String _normalize(String text) {
+    var result = text.toLowerCase();
+
+    // Remove zero-width characters
+    result = result.replaceAll('\u200b', '');
+    result = result.replaceAll('\u200c', '');
+    result = result.replaceAll('\u200d', '');
+    result = result.replaceAll('\ufeff', '');
+
+    // Remove common separator characters used to evade filters
+    result = result.replaceAll(RegExp(r'[.\-_*|]'), '');
+
+    return result;
+  }
+
+  // Hard-blocked patterns — these match slurs and direct threats.
+  // Mirrors the server-side patterns in content_filter.go.
+  static final List<RegExp> _hardBlockPatterns = [
+    // N-word and variants
+    RegExp(r'\bn[i1!|l][gq9][gq9]+[e3a@]?[r0d]?s?\b', caseSensitive: false),
+    RegExp(r'\bn[i1!|l][gq9]+[aA@]\b', caseSensitive: false),
+    RegExp(r'\bn\s*[i1!]\s*[gq9]\s*[gq9]\s*[e3a]?\s*[r0]?\b', caseSensitive: false),
+
+    // F-word (homophobic slur) and variants
+    RegExp(r'\bf[a@4][gq9][gq9]?[o0]?[t7]?s?\b', caseSensitive: false),
+    RegExp(r'\bf\s*[a@4]\s*[gq9]\s*[gq9]?\s*[o0]?\s*[t7]?\b', caseSensitive: false),
+
+    // K-word (anti-Jewish slur)
+    RegExp(r'\bk[i1][k]+[e3]?s?\b', caseSensitive: false),
+
+    // C-word (racial slur against Asian people)
+    RegExp(r'\bch[i1]n[k]+s?\b', caseSensitive: false),
+
+    // S-word (anti-Hispanic slur)
+    RegExp(r'\bsp[i1][ck]+s?\b', caseSensitive: false),
+
+    // W-word (racial slur)
+    RegExp(r'\bw[e3][t7]b[a@]ck+s?\b', caseSensitive: false),
+
+    // R-word (ableist slur)
+    RegExp(r'\br[e3]t[a@]rd+s?\b', caseSensitive: false),
+
+    // T-word (transphobic slur)
+    RegExp(r'\btr[a@4]nn[yie]+s?\b', caseSensitive: false),
+
+    // Direct death/violence threats
+    RegExp(r"\b(i('?m| am) go(ing|nna)|i('?ll| will)) (to )?(kill|murder|shoot|stab|rape)\b", caseSensitive: false),
+    RegExp(r'\b(kill|murder|shoot|stab|rape) (you|them|him|her|all)\b', caseSensitive: false),
+  ];
+}