Add layered content moderation: hard blocklist + strike system + client-side filter
This commit is contained in:
parent
35740f3fc6
commit
f6c4bb88e0
|
|
@ -120,11 +120,14 @@ func main() {
|
|||
// Initialize appeal service
|
||||
appealService := services.NewAppealService(dbPool)
|
||||
|
||||
// Initialize content filter (hard blocklist + strike system)
|
||||
contentFilter := services.NewContentFilter(dbPool)
|
||||
|
||||
hub := realtime.NewHub()
|
||||
wsHandler := handlers.NewWSHandler(hub, cfg.JWTSecret)
|
||||
|
||||
userHandler := handlers.NewUserHandler(userRepo, postRepo, notificationService, assetService)
|
||||
postHandler := handlers.NewPostHandler(postRepo, userRepo, feedService, assetService, notificationService, moderationService)
|
||||
postHandler := handlers.NewPostHandler(postRepo, userRepo, feedService, assetService, notificationService, moderationService, contentFilter)
|
||||
chatHandler := handlers.NewChatHandler(chatRepo, notificationService, hub)
|
||||
authHandler := handlers.NewAuthHandler(userRepo, cfg, emailService)
|
||||
categoryHandler := handlers.NewCategoryHandler(categoryRepo)
|
||||
|
|
|
|||
|
|
@ -22,9 +22,10 @@ type PostHandler struct {
|
|||
assetService *services.AssetService
|
||||
notificationService *services.NotificationService
|
||||
moderationService *services.ModerationService
|
||||
contentFilter *services.ContentFilter
|
||||
}
|
||||
|
||||
func NewPostHandler(postRepo *repository.PostRepository, userRepo *repository.UserRepository, feedService *services.FeedService, assetService *services.AssetService, notificationService *services.NotificationService, moderationService *services.ModerationService) *PostHandler {
|
||||
func NewPostHandler(postRepo *repository.PostRepository, userRepo *repository.UserRepository, feedService *services.FeedService, assetService *services.AssetService, notificationService *services.NotificationService, moderationService *services.ModerationService, contentFilter *services.ContentFilter) *PostHandler {
|
||||
return &PostHandler{
|
||||
postRepo: postRepo,
|
||||
userRepo: userRepo,
|
||||
|
|
@ -32,6 +33,7 @@ func NewPostHandler(postRepo *repository.PostRepository, userRepo *repository.Us
|
|||
assetService: assetService,
|
||||
notificationService: notificationService,
|
||||
moderationService: moderationService,
|
||||
contentFilter: contentFilter,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -55,6 +57,23 @@ func (h *PostHandler) CreateComment(c *gin.Context) {
|
|||
return
|
||||
}
|
||||
|
||||
// Layer 0: Hard blocklist check — reject immediately, never save
|
||||
if h.contentFilter != nil {
|
||||
result := h.contentFilter.CheckContent(req.Body)
|
||||
if result.Blocked {
|
||||
// Record strike
|
||||
strikeCount, consequence, _ := h.contentFilter.RecordStrike(c.Request.Context(), userID, result.Category, req.Body)
|
||||
c.JSON(http.StatusUnprocessableEntity, gin.H{
|
||||
"error": result.Message,
|
||||
"blocked": true,
|
||||
"category": result.Category,
|
||||
"strikes": strikeCount,
|
||||
"consequence": consequence,
|
||||
})
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
tags := utils.ExtractHashtags(req.Body)
|
||||
tone := "neutral"
|
||||
cis := 0.8
|
||||
|
|
@ -185,6 +204,22 @@ func (h *PostHandler) CreatePost(c *gin.Context) {
|
|||
return
|
||||
}
|
||||
|
||||
// Layer 0: Hard blocklist check — reject immediately, never save
|
||||
if h.contentFilter != nil {
|
||||
result := h.contentFilter.CheckContent(req.Body)
|
||||
if result.Blocked {
|
||||
strikeCount, consequence, _ := h.contentFilter.RecordStrike(c.Request.Context(), userID, result.Category, req.Body)
|
||||
c.JSON(http.StatusUnprocessableEntity, gin.H{
|
||||
"error": result.Message,
|
||||
"blocked": true,
|
||||
"category": result.Category,
|
||||
"strikes": strikeCount,
|
||||
"consequence": consequence,
|
||||
})
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// 1. Check rate limit (Simplification)
|
||||
trustState, err := h.userRepo.GetTrustState(c.Request.Context(), userID.String())
|
||||
if err == nil && trustState.PostsToday >= 50 { // Example hard limit
|
||||
|
|
|
|||
210
go-backend/internal/services/content_filter.go
Normal file
210
go-backend/internal/services/content_filter.go
Normal file
|
|
@ -0,0 +1,210 @@
|
|||
package services
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"github.com/jackc/pgx/v5/pgxpool"
|
||||
)
|
||||
|
||||
// ContentFilter provides hard blocklist checking and strike tracking.
|
||||
// Layer 0: Instant rejection for obvious slurs — post never saves.
|
||||
type ContentFilter struct {
|
||||
pool *pgxpool.Pool
|
||||
patterns []*blockedPattern
|
||||
}
|
||||
|
||||
type blockedPattern struct {
|
||||
regex *regexp.Regexp
|
||||
category string // "slur", "threat", etc.
|
||||
severity string // "hard" = instant block, "soft" = warning
|
||||
}
|
||||
|
||||
// ContentCheckResult is returned by CheckContent.
|
||||
type ContentCheckResult struct {
|
||||
Blocked bool `json:"blocked"`
|
||||
Category string `json:"category,omitempty"`
|
||||
Message string `json:"message,omitempty"`
|
||||
}
|
||||
|
||||
func NewContentFilter(pool *pgxpool.Pool) *ContentFilter {
|
||||
cf := &ContentFilter{pool: pool}
|
||||
cf.buildPatterns()
|
||||
return cf
|
||||
}
|
||||
|
||||
// buildPatterns compiles regex patterns for slur detection.
|
||||
// Uses word-boundary-aware patterns that catch common evasion tactics:
|
||||
// - Spacing (n i g g e r)
|
||||
// - Leetspeak (n1gg3r)
|
||||
// - Repeated chars (niggger)
|
||||
// - Partial masking (n*gger, n**ga)
|
||||
func (cf *ContentFilter) buildPatterns() {
|
||||
type entry struct {
|
||||
pattern string
|
||||
category string
|
||||
severity string
|
||||
}
|
||||
|
||||
// Hard-blocked slurs — these NEVER get posted.
|
||||
// Patterns use (?i) for case-insensitive and flexible char matching.
|
||||
entries := []entry{
|
||||
// N-word and variants
|
||||
{`(?i)\bn[i1!|l][gq9][gq9]+[e3a@]?[r0d]?s?\b`, "slur", "hard"},
|
||||
{`(?i)\bn[i1!|l][gq9]+[aA@]\b`, "slur", "hard"},
|
||||
{`(?i)\bn\s*[i1!]\s*[gq9]\s*[gq9]\s*[e3a]?\s*[r0]?\b`, "slur", "hard"},
|
||||
|
||||
// F-word (homophobic slur) and variants
|
||||
{`(?i)\bf[a@4][gq9][gq9]?[o0]?[t7]?s?\b`, "slur", "hard"},
|
||||
{`(?i)\bf\s*[a@4]\s*[gq9]\s*[gq9]?\s*[o0]?\s*[t7]?\b`, "slur", "hard"},
|
||||
|
||||
// K-word (anti-Jewish slur)
|
||||
{`(?i)\bk[i1][k]+[e3]?s?\b`, "slur", "hard"},
|
||||
|
||||
// C-word (racial slur against Asian people)
|
||||
{`(?i)\bch[i1]n[k]+s?\b`, "slur", "hard"},
|
||||
|
||||
// S-word (anti-Hispanic slur)
|
||||
{`(?i)\bsp[i1][ck]+s?\b`, "slur", "hard"},
|
||||
|
||||
// W-word (racial slur)
|
||||
{`(?i)\bw[e3][t7]b[a@]ck+s?\b`, "slur", "hard"},
|
||||
|
||||
// R-word (ableist slur)
|
||||
{`(?i)\br[e3]t[a@]rd+s?\b`, "slur", "hard"},
|
||||
|
||||
// T-word (transphobic slur)
|
||||
{`(?i)\btr[a@4]nn[yie]+s?\b`, "slur", "hard"},
|
||||
|
||||
// Direct death/violence threats
|
||||
{`(?i)\b(i('?m| am) go(ing|nna)|i('?ll| will)) (to )?(kill|murder|shoot|stab|rape)\b`, "threat", "hard"},
|
||||
{`(?i)\b(kill|murder|shoot|stab|rape) (you|them|him|her|all)\b`, "threat", "hard"},
|
||||
}
|
||||
|
||||
cf.patterns = make([]*blockedPattern, 0, len(entries))
|
||||
for _, e := range entries {
|
||||
re, err := regexp.Compile(e.pattern)
|
||||
if err != nil {
|
||||
fmt.Printf("Content filter: failed to compile pattern %q: %v\n", e.pattern, err)
|
||||
continue
|
||||
}
|
||||
cf.patterns = append(cf.patterns, &blockedPattern{
|
||||
regex: re,
|
||||
category: e.category,
|
||||
severity: e.severity,
|
||||
})
|
||||
}
|
||||
|
||||
fmt.Printf("Content filter: loaded %d patterns\n", len(cf.patterns))
|
||||
}
|
||||
|
||||
// CheckContent scans text against the hard blocklist.
|
||||
// Returns immediately on first match — no need to check all patterns.
|
||||
func (cf *ContentFilter) CheckContent(text string) *ContentCheckResult {
|
||||
if text == "" {
|
||||
return &ContentCheckResult{Blocked: false}
|
||||
}
|
||||
|
||||
// Normalize: collapse whitespace, strip zero-width chars
|
||||
normalized := normalizeText(text)
|
||||
|
||||
for _, p := range cf.patterns {
|
||||
if p.severity == "hard" && p.regex.MatchString(normalized) {
|
||||
return &ContentCheckResult{
|
||||
Blocked: true,
|
||||
Category: p.category,
|
||||
Message: "This content contains language that isn't allowed on Sojorn. Please revise your post.",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return &ContentCheckResult{Blocked: false}
|
||||
}
|
||||
|
||||
// RecordStrike records a content violation strike against a user.
|
||||
// Strike escalation:
|
||||
//
|
||||
// 1-2 strikes: warning (post blocked, user informed)
|
||||
// 3 strikes: 24-hour posting suspension
|
||||
// 5 strikes: 7-day suspension
|
||||
// 7+ strikes: permanent ban
|
||||
func (cf *ContentFilter) RecordStrike(ctx context.Context, userID uuid.UUID, category, content string) (int, string, error) {
|
||||
// Insert strike
|
||||
_, err := cf.pool.Exec(ctx, `
|
||||
INSERT INTO content_strikes (user_id, category, content_snippet, created_at)
|
||||
VALUES ($1, $2, $3, NOW())
|
||||
`, userID, category, truncate(content, 100))
|
||||
if err != nil {
|
||||
return 0, "", fmt.Errorf("failed to record strike: %w", err)
|
||||
}
|
||||
|
||||
// Count recent strikes (last 30 days)
|
||||
var count int
|
||||
err = cf.pool.QueryRow(ctx, `
|
||||
SELECT COUNT(*) FROM content_strikes
|
||||
WHERE user_id = $1 AND created_at > NOW() - INTERVAL '30 days'
|
||||
`, userID).Scan(&count)
|
||||
if err != nil {
|
||||
return 0, "", fmt.Errorf("failed to count strikes: %w", err)
|
||||
}
|
||||
|
||||
// Determine consequence
|
||||
consequence := "warning"
|
||||
switch {
|
||||
case count >= 7:
|
||||
consequence = "ban"
|
||||
cf.pool.Exec(ctx, `UPDATE users SET status = 'banned' WHERE id = $1`, userID)
|
||||
fmt.Printf("Content filter: user %s BANNED (%d strikes)\n", userID, count)
|
||||
case count >= 5:
|
||||
consequence = "suspend_7d"
|
||||
suspendUntil := time.Now().Add(7 * 24 * time.Hour)
|
||||
cf.pool.Exec(ctx, `UPDATE users SET status = 'suspended', suspended_until = $2 WHERE id = $1`, userID, suspendUntil)
|
||||
fmt.Printf("Content filter: user %s suspended 7 days (%d strikes)\n", userID, count)
|
||||
case count >= 3:
|
||||
consequence = "suspend_24h"
|
||||
suspendUntil := time.Now().Add(24 * time.Hour)
|
||||
cf.pool.Exec(ctx, `UPDATE users SET status = 'suspended', suspended_until = $2 WHERE id = $1`, userID, suspendUntil)
|
||||
fmt.Printf("Content filter: user %s suspended 24h (%d strikes)\n", userID, count)
|
||||
default:
|
||||
fmt.Printf("Content filter: user %s warning (%d strikes)\n", userID, count)
|
||||
}
|
||||
|
||||
return count, consequence, nil
|
||||
}
|
||||
|
||||
// GetUserStrikes returns the number of recent strikes for a user.
|
||||
func (cf *ContentFilter) GetUserStrikes(ctx context.Context, userID uuid.UUID) (int, error) {
|
||||
var count int
|
||||
err := cf.pool.QueryRow(ctx, `
|
||||
SELECT COUNT(*) FROM content_strikes
|
||||
WHERE user_id = $1 AND created_at > NOW() - INTERVAL '30 days'
|
||||
`, userID).Scan(&count)
|
||||
return count, err
|
||||
}
|
||||
|
||||
// normalizeText strips common evasion characters and collapses spacing.
|
||||
func normalizeText(text string) string {
|
||||
// Remove zero-width characters
|
||||
text = strings.ReplaceAll(text, "\u200b", "") // zero-width space
|
||||
text = strings.ReplaceAll(text, "\u200c", "") // zero-width non-joiner
|
||||
text = strings.ReplaceAll(text, "\u200d", "") // zero-width joiner
|
||||
text = strings.ReplaceAll(text, "\ufeff", "") // BOM
|
||||
|
||||
// Remove common separator characters used to evade filters
|
||||
for _, ch := range []string{".", "-", "_", "*", "|"} {
|
||||
text = strings.ReplaceAll(text, ch, "")
|
||||
}
|
||||
|
||||
return text
|
||||
}
|
||||
|
||||
func truncate(s string, maxLen int) string {
|
||||
if len(s) <= maxLen {
|
||||
return s
|
||||
}
|
||||
return s[:maxLen]
|
||||
}
|
||||
18
go-backend/scripts/create_content_strikes.sql
Normal file
18
go-backend/scripts/create_content_strikes.sql
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
CREATE TABLE IF NOT EXISTS content_strikes (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
||||
category TEXT NOT NULL,
|
||||
content_snippet TEXT,
|
||||
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_content_strikes_user_id ON content_strikes(user_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_content_strikes_created_at ON content_strikes(created_at);
|
||||
|
||||
-- Add suspended_until column to users if not exists
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name='users' AND column_name='suspended_until') THEN
|
||||
ALTER TABLE users ADD COLUMN suspended_until TIMESTAMP WITH TIME ZONE;
|
||||
END IF;
|
||||
END $$;
|
||||
|
|
@ -15,6 +15,7 @@ import '../../providers/feed_refresh_provider.dart';
|
|||
import '../../services/image_upload_service.dart';
|
||||
import '../../theme/app_theme.dart';
|
||||
import '../../widgets/composer/composer_toolbar.dart';
|
||||
import '../../services/content_filter.dart';
|
||||
import '../../widgets/sojorn_snackbar.dart';
|
||||
import 'image_editor_screen.dart';
|
||||
import '../quips/create/quip_studio_screen.dart'; // Added import
|
||||
|
|
@ -322,6 +323,13 @@ class _ComposeScreenState extends ConsumerState<ComposeScreen> {
|
|||
return;
|
||||
}
|
||||
|
||||
// Layer 0: Client-side hard blocklist — never even send to server
|
||||
final blockMessage = ContentFilter.instance.check(_bodyController.text.trim());
|
||||
if (blockMessage != null) {
|
||||
await _showBlockedDialog(blockMessage);
|
||||
return;
|
||||
}
|
||||
|
||||
setState(() {
|
||||
_isLoading = true;
|
||||
_errorMessage = null;
|
||||
|
|
@ -400,9 +408,15 @@ class _ComposeScreenState extends ConsumerState<ComposeScreen> {
|
|||
'Content verification temporarily unavailable. Please try again.';
|
||||
});
|
||||
} catch (e) {
|
||||
setState(() {
|
||||
_errorMessage = e.toString().replaceAll('Exception: ', '');
|
||||
});
|
||||
final msg = e.toString().replaceAll('Exception: ', '');
|
||||
// Server-side blocklist catch (422 with blocked content message)
|
||||
if (msg.contains("isn't allowed on Sojorn") || msg.contains('not allowed')) {
|
||||
if (mounted) await _showBlockedDialog(msg);
|
||||
} else {
|
||||
setState(() {
|
||||
_errorMessage = msg;
|
||||
});
|
||||
}
|
||||
} finally {
|
||||
if (mounted) {
|
||||
setState(() {
|
||||
|
|
@ -440,6 +454,38 @@ class _ComposeScreenState extends ConsumerState<ComposeScreen> {
|
|||
return result ?? false;
|
||||
}
|
||||
|
||||
Future<void> _showBlockedDialog(String message) async {
|
||||
await showDialog<void>(
|
||||
context: context,
|
||||
barrierDismissible: false,
|
||||
builder: (context) => AlertDialog(
|
||||
shape: RoundedRectangleBorder(borderRadius: BorderRadius.circular(16)),
|
||||
title: Row(
|
||||
children: [
|
||||
Icon(Icons.block, color: AppTheme.error, size: 24),
|
||||
const SizedBox(width: 8),
|
||||
const Text('Not Allowed'),
|
||||
],
|
||||
),
|
||||
content: Text(
|
||||
message,
|
||||
style: AppTheme.textTheme.bodyMedium,
|
||||
),
|
||||
actions: [
|
||||
ElevatedButton(
|
||||
onPressed: () => Navigator.pop(context),
|
||||
style: ElevatedButton.styleFrom(
|
||||
backgroundColor: AppTheme.brightNavy,
|
||||
foregroundColor: AppTheme.white,
|
||||
shape: const StadiumBorder(),
|
||||
),
|
||||
child: const Text('Edit My Post'),
|
||||
),
|
||||
],
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
bool get _canPublish {
|
||||
return _bodyController.text.trim().isNotEmpty &&
|
||||
_bodyController.text.trim().length <= _maxCharacters &&
|
||||
|
|
|
|||
74
sojorn_app/lib/services/content_filter.dart
Normal file
74
sojorn_app/lib/services/content_filter.dart
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
/// Client-side content filter for Sojorn.
|
||||
/// Layer 0: Catches obvious slurs BEFORE sending to server.
|
||||
/// This prevents the post from ever leaving the device.
|
||||
class ContentFilter {
|
||||
ContentFilter._();
|
||||
static final instance = ContentFilter._();
|
||||
|
||||
/// Check text for hard-blocked content.
|
||||
/// Returns null if clean, or a user-friendly message if blocked.
|
||||
String? check(String text) {
|
||||
if (text.isEmpty) return null;
|
||||
|
||||
final normalized = _normalize(text);
|
||||
|
||||
for (final pattern in _hardBlockPatterns) {
|
||||
if (pattern.hasMatch(normalized)) {
|
||||
return "We don't allow that kind of language on Sojorn. Please revise your post.";
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/// Normalize text to catch common evasion tactics.
|
||||
String _normalize(String text) {
|
||||
var result = text.toLowerCase();
|
||||
|
||||
// Remove zero-width characters
|
||||
result = result.replaceAll('\u200b', '');
|
||||
result = result.replaceAll('\u200c', '');
|
||||
result = result.replaceAll('\u200d', '');
|
||||
result = result.replaceAll('\ufeff', '');
|
||||
|
||||
// Remove common separator characters used to evade filters
|
||||
result = result.replaceAll(RegExp(r'[.\-_*|]'), '');
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Hard-blocked patterns — these match slurs and direct threats.
|
||||
// Mirrors the server-side patterns in content_filter.go.
|
||||
static final List<RegExp> _hardBlockPatterns = [
|
||||
// N-word and variants
|
||||
RegExp(r'\bn[i1!|l][gq9][gq9]+[e3a@]?[r0d]?s?\b', caseSensitive: false),
|
||||
RegExp(r'\bn[i1!|l][gq9]+[aA@]\b', caseSensitive: false),
|
||||
RegExp(r'\bn\s*[i1!]\s*[gq9]\s*[gq9]\s*[e3a]?\s*[r0]?\b', caseSensitive: false),
|
||||
|
||||
// F-word (homophobic slur) and variants
|
||||
RegExp(r'\bf[a@4][gq9][gq9]?[o0]?[t7]?s?\b', caseSensitive: false),
|
||||
RegExp(r'\bf\s*[a@4]\s*[gq9]\s*[gq9]?\s*[o0]?\s*[t7]?\b', caseSensitive: false),
|
||||
|
||||
// K-word (anti-Jewish slur)
|
||||
RegExp(r'\bk[i1][k]+[e3]?s?\b', caseSensitive: false),
|
||||
|
||||
// C-word (racial slur against Asian people)
|
||||
RegExp(r'\bch[i1]n[k]+s?\b', caseSensitive: false),
|
||||
|
||||
// S-word (anti-Hispanic slur)
|
||||
RegExp(r'\bsp[i1][ck]+s?\b', caseSensitive: false),
|
||||
|
||||
// W-word (racial slur)
|
||||
RegExp(r'\bw[e3][t7]b[a@]ck+s?\b', caseSensitive: false),
|
||||
|
||||
// R-word (ableist slur)
|
||||
RegExp(r'\br[e3]t[a@]rd+s?\b', caseSensitive: false),
|
||||
|
||||
// T-word (transphobic slur)
|
||||
RegExp(r'\btr[a@4]nn[yie]+s?\b', caseSensitive: false),
|
||||
|
||||
// Direct death/violence threats
|
||||
RegExp(r"\b(i('?m| am) go(ing|nna)|i('?ll| will)) (to )?(kill|murder|shoot|stab|rape)\b", caseSensitive: false),
|
||||
RegExp(r'\b(kill|murder|shoot|stab|rape) (you|them|him|her|all)\b', caseSensitive: false),
|
||||
];
|
||||
}
|
||||
Loading…
Reference in a new issue