Add layered content moderation: hard blocklist + strike system + client-side filter

This commit is contained in:
Patrick Britton 2026-02-06 11:46:30 -06:00
parent 35740f3fc6
commit f6c4bb88e0
6 changed files with 391 additions and 5 deletions

View file

@ -120,11 +120,14 @@ func main() {
// Initialize appeal service // Initialize appeal service
appealService := services.NewAppealService(dbPool) appealService := services.NewAppealService(dbPool)
// Initialize content filter (hard blocklist + strike system)
contentFilter := services.NewContentFilter(dbPool)
hub := realtime.NewHub() hub := realtime.NewHub()
wsHandler := handlers.NewWSHandler(hub, cfg.JWTSecret) wsHandler := handlers.NewWSHandler(hub, cfg.JWTSecret)
userHandler := handlers.NewUserHandler(userRepo, postRepo, notificationService, assetService) userHandler := handlers.NewUserHandler(userRepo, postRepo, notificationService, assetService)
postHandler := handlers.NewPostHandler(postRepo, userRepo, feedService, assetService, notificationService, moderationService) postHandler := handlers.NewPostHandler(postRepo, userRepo, feedService, assetService, notificationService, moderationService, contentFilter)
chatHandler := handlers.NewChatHandler(chatRepo, notificationService, hub) chatHandler := handlers.NewChatHandler(chatRepo, notificationService, hub)
authHandler := handlers.NewAuthHandler(userRepo, cfg, emailService) authHandler := handlers.NewAuthHandler(userRepo, cfg, emailService)
categoryHandler := handlers.NewCategoryHandler(categoryRepo) categoryHandler := handlers.NewCategoryHandler(categoryRepo)

View file

@ -22,9 +22,10 @@ type PostHandler struct {
assetService *services.AssetService assetService *services.AssetService
notificationService *services.NotificationService notificationService *services.NotificationService
moderationService *services.ModerationService moderationService *services.ModerationService
contentFilter *services.ContentFilter
} }
func NewPostHandler(postRepo *repository.PostRepository, userRepo *repository.UserRepository, feedService *services.FeedService, assetService *services.AssetService, notificationService *services.NotificationService, moderationService *services.ModerationService) *PostHandler { func NewPostHandler(postRepo *repository.PostRepository, userRepo *repository.UserRepository, feedService *services.FeedService, assetService *services.AssetService, notificationService *services.NotificationService, moderationService *services.ModerationService, contentFilter *services.ContentFilter) *PostHandler {
return &PostHandler{ return &PostHandler{
postRepo: postRepo, postRepo: postRepo,
userRepo: userRepo, userRepo: userRepo,
@ -32,6 +33,7 @@ func NewPostHandler(postRepo *repository.PostRepository, userRepo *repository.Us
assetService: assetService, assetService: assetService,
notificationService: notificationService, notificationService: notificationService,
moderationService: moderationService, moderationService: moderationService,
contentFilter: contentFilter,
} }
} }
@ -55,6 +57,23 @@ func (h *PostHandler) CreateComment(c *gin.Context) {
return return
} }
// Layer 0: Hard blocklist check — reject immediately, never save
if h.contentFilter != nil {
result := h.contentFilter.CheckContent(req.Body)
if result.Blocked {
// Record strike
strikeCount, consequence, _ := h.contentFilter.RecordStrike(c.Request.Context(), userID, result.Category, req.Body)
c.JSON(http.StatusUnprocessableEntity, gin.H{
"error": result.Message,
"blocked": true,
"category": result.Category,
"strikes": strikeCount,
"consequence": consequence,
})
return
}
}
tags := utils.ExtractHashtags(req.Body) tags := utils.ExtractHashtags(req.Body)
tone := "neutral" tone := "neutral"
cis := 0.8 cis := 0.8
@ -185,6 +204,22 @@ func (h *PostHandler) CreatePost(c *gin.Context) {
return return
} }
// Layer 0: Hard blocklist check — reject immediately, never save
if h.contentFilter != nil {
result := h.contentFilter.CheckContent(req.Body)
if result.Blocked {
strikeCount, consequence, _ := h.contentFilter.RecordStrike(c.Request.Context(), userID, result.Category, req.Body)
c.JSON(http.StatusUnprocessableEntity, gin.H{
"error": result.Message,
"blocked": true,
"category": result.Category,
"strikes": strikeCount,
"consequence": consequence,
})
return
}
}
// 1. Check rate limit (Simplification) // 1. Check rate limit (Simplification)
trustState, err := h.userRepo.GetTrustState(c.Request.Context(), userID.String()) trustState, err := h.userRepo.GetTrustState(c.Request.Context(), userID.String())
if err == nil && trustState.PostsToday >= 50 { // Example hard limit if err == nil && trustState.PostsToday >= 50 { // Example hard limit

View file

@ -0,0 +1,210 @@
package services
import (
"context"
"fmt"
"regexp"
"strings"
"time"
"github.com/google/uuid"
"github.com/jackc/pgx/v5/pgxpool"
)
// ContentFilter provides hard blocklist checking and strike tracking.
// Layer 0: Instant rejection for obvious slurs — post never saves.
type ContentFilter struct {
pool *pgxpool.Pool
patterns []*blockedPattern
}
type blockedPattern struct {
regex *regexp.Regexp
category string // "slur", "threat", etc.
severity string // "hard" = instant block, "soft" = warning
}
// ContentCheckResult is returned by CheckContent.
type ContentCheckResult struct {
Blocked bool `json:"blocked"`
Category string `json:"category,omitempty"`
Message string `json:"message,omitempty"`
}
func NewContentFilter(pool *pgxpool.Pool) *ContentFilter {
cf := &ContentFilter{pool: pool}
cf.buildPatterns()
return cf
}
// buildPatterns compiles regex patterns for slur detection.
// Uses word-boundary-aware patterns that catch common evasion tactics:
// - Spacing (n i g g e r)
// - Leetspeak (n1gg3r)
// - Repeated chars (niggger)
// - Partial masking (n*gger, n**ga)
func (cf *ContentFilter) buildPatterns() {
type entry struct {
pattern string
category string
severity string
}
// Hard-blocked slurs — these NEVER get posted.
// Patterns use (?i) for case-insensitive and flexible char matching.
entries := []entry{
// N-word and variants
{`(?i)\bn[i1!|l][gq9][gq9]+[e3a@]?[r0d]?s?\b`, "slur", "hard"},
{`(?i)\bn[i1!|l][gq9]+[aA@]\b`, "slur", "hard"},
{`(?i)\bn\s*[i1!]\s*[gq9]\s*[gq9]\s*[e3a]?\s*[r0]?\b`, "slur", "hard"},
// F-word (homophobic slur) and variants
{`(?i)\bf[a@4][gq9][gq9]?[o0]?[t7]?s?\b`, "slur", "hard"},
{`(?i)\bf\s*[a@4]\s*[gq9]\s*[gq9]?\s*[o0]?\s*[t7]?\b`, "slur", "hard"},
// K-word (anti-Jewish slur)
{`(?i)\bk[i1][k]+[e3]?s?\b`, "slur", "hard"},
// C-word (racial slur against Asian people)
{`(?i)\bch[i1]n[k]+s?\b`, "slur", "hard"},
// S-word (anti-Hispanic slur)
{`(?i)\bsp[i1][ck]+s?\b`, "slur", "hard"},
// W-word (racial slur)
{`(?i)\bw[e3][t7]b[a@]ck+s?\b`, "slur", "hard"},
// R-word (ableist slur)
{`(?i)\br[e3]t[a@]rd+s?\b`, "slur", "hard"},
// T-word (transphobic slur)
{`(?i)\btr[a@4]nn[yie]+s?\b`, "slur", "hard"},
// Direct death/violence threats
{`(?i)\b(i('?m| am) go(ing|nna)|i('?ll| will)) (to )?(kill|murder|shoot|stab|rape)\b`, "threat", "hard"},
{`(?i)\b(kill|murder|shoot|stab|rape) (you|them|him|her|all)\b`, "threat", "hard"},
}
cf.patterns = make([]*blockedPattern, 0, len(entries))
for _, e := range entries {
re, err := regexp.Compile(e.pattern)
if err != nil {
fmt.Printf("Content filter: failed to compile pattern %q: %v\n", e.pattern, err)
continue
}
cf.patterns = append(cf.patterns, &blockedPattern{
regex: re,
category: e.category,
severity: e.severity,
})
}
fmt.Printf("Content filter: loaded %d patterns\n", len(cf.patterns))
}
// CheckContent scans text against the hard blocklist.
// Returns immediately on first match — no need to check all patterns.
func (cf *ContentFilter) CheckContent(text string) *ContentCheckResult {
if text == "" {
return &ContentCheckResult{Blocked: false}
}
// Normalize: collapse whitespace, strip zero-width chars
normalized := normalizeText(text)
for _, p := range cf.patterns {
if p.severity == "hard" && p.regex.MatchString(normalized) {
return &ContentCheckResult{
Blocked: true,
Category: p.category,
Message: "This content contains language that isn't allowed on Sojorn. Please revise your post.",
}
}
}
return &ContentCheckResult{Blocked: false}
}
// RecordStrike records a content violation strike against a user.
// Strike escalation:
//
// 1-2 strikes: warning (post blocked, user informed)
// 3 strikes: 24-hour posting suspension
// 5 strikes: 7-day suspension
// 7+ strikes: permanent ban
func (cf *ContentFilter) RecordStrike(ctx context.Context, userID uuid.UUID, category, content string) (int, string, error) {
// Insert strike
_, err := cf.pool.Exec(ctx, `
INSERT INTO content_strikes (user_id, category, content_snippet, created_at)
VALUES ($1, $2, $3, NOW())
`, userID, category, truncate(content, 100))
if err != nil {
return 0, "", fmt.Errorf("failed to record strike: %w", err)
}
// Count recent strikes (last 30 days)
var count int
err = cf.pool.QueryRow(ctx, `
SELECT COUNT(*) FROM content_strikes
WHERE user_id = $1 AND created_at > NOW() - INTERVAL '30 days'
`, userID).Scan(&count)
if err != nil {
return 0, "", fmt.Errorf("failed to count strikes: %w", err)
}
// Determine consequence
consequence := "warning"
switch {
case count >= 7:
consequence = "ban"
cf.pool.Exec(ctx, `UPDATE users SET status = 'banned' WHERE id = $1`, userID)
fmt.Printf("Content filter: user %s BANNED (%d strikes)\n", userID, count)
case count >= 5:
consequence = "suspend_7d"
suspendUntil := time.Now().Add(7 * 24 * time.Hour)
cf.pool.Exec(ctx, `UPDATE users SET status = 'suspended', suspended_until = $2 WHERE id = $1`, userID, suspendUntil)
fmt.Printf("Content filter: user %s suspended 7 days (%d strikes)\n", userID, count)
case count >= 3:
consequence = "suspend_24h"
suspendUntil := time.Now().Add(24 * time.Hour)
cf.pool.Exec(ctx, `UPDATE users SET status = 'suspended', suspended_until = $2 WHERE id = $1`, userID, suspendUntil)
fmt.Printf("Content filter: user %s suspended 24h (%d strikes)\n", userID, count)
default:
fmt.Printf("Content filter: user %s warning (%d strikes)\n", userID, count)
}
return count, consequence, nil
}
// GetUserStrikes returns the number of recent strikes for a user.
func (cf *ContentFilter) GetUserStrikes(ctx context.Context, userID uuid.UUID) (int, error) {
var count int
err := cf.pool.QueryRow(ctx, `
SELECT COUNT(*) FROM content_strikes
WHERE user_id = $1 AND created_at > NOW() - INTERVAL '30 days'
`, userID).Scan(&count)
return count, err
}
// normalizeText strips common evasion characters and collapses spacing.
func normalizeText(text string) string {
// Remove zero-width characters
text = strings.ReplaceAll(text, "\u200b", "") // zero-width space
text = strings.ReplaceAll(text, "\u200c", "") // zero-width non-joiner
text = strings.ReplaceAll(text, "\u200d", "") // zero-width joiner
text = strings.ReplaceAll(text, "\ufeff", "") // BOM
// Remove common separator characters used to evade filters
for _, ch := range []string{".", "-", "_", "*", "|"} {
text = strings.ReplaceAll(text, ch, "")
}
return text
}
func truncate(s string, maxLen int) string {
if len(s) <= maxLen {
return s
}
return s[:maxLen]
}

View file

@ -0,0 +1,18 @@
CREATE TABLE IF NOT EXISTS content_strikes (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
category TEXT NOT NULL,
content_snippet TEXT,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_content_strikes_user_id ON content_strikes(user_id);
CREATE INDEX IF NOT EXISTS idx_content_strikes_created_at ON content_strikes(created_at);
-- Add suspended_until column to users if not exists
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name='users' AND column_name='suspended_until') THEN
ALTER TABLE users ADD COLUMN suspended_until TIMESTAMP WITH TIME ZONE;
END IF;
END $$;

View file

@ -15,6 +15,7 @@ import '../../providers/feed_refresh_provider.dart';
import '../../services/image_upload_service.dart'; import '../../services/image_upload_service.dart';
import '../../theme/app_theme.dart'; import '../../theme/app_theme.dart';
import '../../widgets/composer/composer_toolbar.dart'; import '../../widgets/composer/composer_toolbar.dart';
import '../../services/content_filter.dart';
import '../../widgets/sojorn_snackbar.dart'; import '../../widgets/sojorn_snackbar.dart';
import 'image_editor_screen.dart'; import 'image_editor_screen.dart';
import '../quips/create/quip_studio_screen.dart'; // Added import import '../quips/create/quip_studio_screen.dart'; // Added import
@ -322,6 +323,13 @@ class _ComposeScreenState extends ConsumerState<ComposeScreen> {
return; return;
} }
// Layer 0: Client-side hard blocklist never even send to server
final blockMessage = ContentFilter.instance.check(_bodyController.text.trim());
if (blockMessage != null) {
await _showBlockedDialog(blockMessage);
return;
}
setState(() { setState(() {
_isLoading = true; _isLoading = true;
_errorMessage = null; _errorMessage = null;
@ -400,9 +408,15 @@ class _ComposeScreenState extends ConsumerState<ComposeScreen> {
'Content verification temporarily unavailable. Please try again.'; 'Content verification temporarily unavailable. Please try again.';
}); });
} catch (e) { } catch (e) {
setState(() { final msg = e.toString().replaceAll('Exception: ', '');
_errorMessage = e.toString().replaceAll('Exception: ', ''); // Server-side blocklist catch (422 with blocked content message)
}); if (msg.contains("isn't allowed on Sojorn") || msg.contains('not allowed')) {
if (mounted) await _showBlockedDialog(msg);
} else {
setState(() {
_errorMessage = msg;
});
}
} finally { } finally {
if (mounted) { if (mounted) {
setState(() { setState(() {
@ -440,6 +454,38 @@ class _ComposeScreenState extends ConsumerState<ComposeScreen> {
return result ?? false; return result ?? false;
} }
Future<void> _showBlockedDialog(String message) async {
await showDialog<void>(
context: context,
barrierDismissible: false,
builder: (context) => AlertDialog(
shape: RoundedRectangleBorder(borderRadius: BorderRadius.circular(16)),
title: Row(
children: [
Icon(Icons.block, color: AppTheme.error, size: 24),
const SizedBox(width: 8),
const Text('Not Allowed'),
],
),
content: Text(
message,
style: AppTheme.textTheme.bodyMedium,
),
actions: [
ElevatedButton(
onPressed: () => Navigator.pop(context),
style: ElevatedButton.styleFrom(
backgroundColor: AppTheme.brightNavy,
foregroundColor: AppTheme.white,
shape: const StadiumBorder(),
),
child: const Text('Edit My Post'),
),
],
),
);
}
bool get _canPublish { bool get _canPublish {
return _bodyController.text.trim().isNotEmpty && return _bodyController.text.trim().isNotEmpty &&
_bodyController.text.trim().length <= _maxCharacters && _bodyController.text.trim().length <= _maxCharacters &&

View file

@ -0,0 +1,74 @@
/// Client-side content filter for Sojorn.
/// Layer 0: Catches obvious slurs BEFORE sending to server.
/// This prevents the post from ever leaving the device.
class ContentFilter {
ContentFilter._();
static final instance = ContentFilter._();
/// Check text for hard-blocked content.
/// Returns null if clean, or a user-friendly message if blocked.
String? check(String text) {
if (text.isEmpty) return null;
final normalized = _normalize(text);
for (final pattern in _hardBlockPatterns) {
if (pattern.hasMatch(normalized)) {
return "We don't allow that kind of language on Sojorn. Please revise your post.";
}
}
return null;
}
/// Normalize text to catch common evasion tactics.
String _normalize(String text) {
var result = text.toLowerCase();
// Remove zero-width characters
result = result.replaceAll('\u200b', '');
result = result.replaceAll('\u200c', '');
result = result.replaceAll('\u200d', '');
result = result.replaceAll('\ufeff', '');
// Remove common separator characters used to evade filters
result = result.replaceAll(RegExp(r'[.\-_*|]'), '');
return result;
}
// Hard-blocked patterns these match slurs and direct threats.
// Mirrors the server-side patterns in content_filter.go.
static final List<RegExp> _hardBlockPatterns = [
// N-word and variants
RegExp(r'\bn[i1!|l][gq9][gq9]+[e3a@]?[r0d]?s?\b', caseSensitive: false),
RegExp(r'\bn[i1!|l][gq9]+[aA@]\b', caseSensitive: false),
RegExp(r'\bn\s*[i1!]\s*[gq9]\s*[gq9]\s*[e3a]?\s*[r0]?\b', caseSensitive: false),
// F-word (homophobic slur) and variants
RegExp(r'\bf[a@4][gq9][gq9]?[o0]?[t7]?s?\b', caseSensitive: false),
RegExp(r'\bf\s*[a@4]\s*[gq9]\s*[gq9]?\s*[o0]?\s*[t7]?\b', caseSensitive: false),
// K-word (anti-Jewish slur)
RegExp(r'\bk[i1][k]+[e3]?s?\b', caseSensitive: false),
// C-word (racial slur against Asian people)
RegExp(r'\bch[i1]n[k]+s?\b', caseSensitive: false),
// S-word (anti-Hispanic slur)
RegExp(r'\bsp[i1][ck]+s?\b', caseSensitive: false),
// W-word (racial slur)
RegExp(r'\bw[e3][t7]b[a@]ck+s?\b', caseSensitive: false),
// R-word (ableist slur)
RegExp(r'\br[e3]t[a@]rd+s?\b', caseSensitive: false),
// T-word (transphobic slur)
RegExp(r'\btr[a@4]nn[yie]+s?\b', caseSensitive: false),
// Direct death/violence threats
RegExp(r"\b(i('?m| am) go(ing|nna)|i('?ll| will)) (to )?(kill|murder|shoot|stab|rape)\b", caseSensitive: false),
RegExp(r'\b(kill|murder|shoot|stab|rape) (you|them|him|her|all)\b', caseSensitive: false),
];
}