From 70fa1dddca1bdfbdf0e30ff8d2a0e1ac91ded789 Mon Sep 17 00:00:00 2001 From: Patrick Britton Date: Fri, 6 Feb 2026 12:01:19 -0600 Subject: [PATCH] Fix content filter: remove word boundaries to catch concatenated slurs --- .../internal/services/content_filter.go | 30 +++++++++---------- sojorn_app/lib/services/content_filter.dart | 30 +++++++++---------- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/go-backend/internal/services/content_filter.go b/go-backend/internal/services/content_filter.go index a56fca9..eef6299 100644 --- a/go-backend/internal/services/content_filter.go +++ b/go-backend/internal/services/content_filter.go @@ -53,36 +53,36 @@ func (cf *ContentFilter) buildPatterns() { // Hard-blocked slurs — these NEVER get posted. // Patterns use (?i) for case-insensitive and flexible char matching. entries := []entry{ - // N-word and variants - {`(?i)\bn[i1!|l][gq9][gq9]+[e3a@]?[r0d]?s?\b`, "slur", "hard"}, - {`(?i)\bn[i1!|l][gq9]+[aA@]\b`, "slur", "hard"}, - {`(?i)\bn\s*[i1!]\s*[gq9]\s*[gq9]\s*[e3a]?\s*[r0]?\b`, "slur", "hard"}, + // N-word and variants (no \b — catches concatenated slurs) + {`(?i)n[i1!|l][gq9][gq9]+[e3a@]?[r0d]?s?`, "slur", "hard"}, + {`(?i)n[i1!|l][gq9]+[aA@]`, "slur", "hard"}, + {`(?i)n\s*[i1!]\s*[gq9]\s*[gq9]\s*[e3a]?\s*[r0]?`, "slur", "hard"}, // F-word (homophobic slur) and variants - {`(?i)\bf[a@4][gq9][gq9]?[o0]?[t7]?s?\b`, "slur", "hard"}, - {`(?i)\bf\s*[a@4]\s*[gq9]\s*[gq9]?\s*[o0]?\s*[t7]?\b`, "slur", "hard"}, + {`(?i)f[a@4][gq9][gq9]?[o0]?[t7]?s?`, "slur", "hard"}, + {`(?i)f\s*[a@4]\s*[gq9]\s*[gq9]?\s*[o0]?\s*[t7]?`, "slur", "hard"}, // K-word (anti-Jewish slur) - {`(?i)\bk[i1][k]+[e3]?s?\b`, "slur", "hard"}, + {`(?i)k[i1][k]+[e3]?s?`, "slur", "hard"}, // C-word (racial slur against Asian people) - {`(?i)\bch[i1]n[k]+s?\b`, "slur", "hard"}, + {`(?i)ch[i1]n[k]+s?`, "slur", "hard"}, // S-word (anti-Hispanic slur) - {`(?i)\bsp[i1][ck]+s?\b`, "slur", "hard"}, + {`(?i)sp[i1][ck]+s?`, "slur", "hard"}, // W-word (racial slur) - {`(?i)\bw[e3][t7]b[a@]ck+s?\b`, "slur", "hard"}, + {`(?i)w[e3][t7]b[a@]ck+s?`, "slur", "hard"}, // R-word (ableist slur) - {`(?i)\br[e3]t[a@]rd+s?\b`, "slur", "hard"}, + {`(?i)r[e3]t[a@]rd+s?`, "slur", "hard"}, // T-word (transphobic slur) - {`(?i)\btr[a@4]nn[yie]+s?\b`, "slur", "hard"}, + {`(?i)tr[a@4]nn[yie]+s?`, "slur", "hard"}, // Direct death/violence threats - {`(?i)\b(i('?m| am) go(ing|nna)|i('?ll| will)) (to )?(kill|murder|shoot|stab|rape)\b`, "threat", "hard"}, - {`(?i)\b(kill|murder|shoot|stab|rape) (you|them|him|her|all)\b`, "threat", "hard"}, + {`(?i)(i('?m| am) go(ing|nna)|i('?ll| will)) (to )?(kill|murder|shoot|stab|rape)`, "threat", "hard"}, + {`(?i)(kill|murder|shoot|stab|rape) (you|them|him|her|all)`, "threat", "hard"}, } cf.patterns = make([]*blockedPattern, 0, len(entries)) @@ -192,7 +192,7 @@ func normalizeText(text string) string { text = strings.ReplaceAll(text, "\u200b", "") // zero-width space text = strings.ReplaceAll(text, "\u200c", "") // zero-width non-joiner text = strings.ReplaceAll(text, "\u200d", "") // zero-width joiner - text = strings.ReplaceAll(text, "\ufeff", "") // BOM + text = strings.ReplaceAll(text, "\ufeff", "") // BOM // Remove common separator characters used to evade filters for _, ch := range []string{".", "-", "_", "*", "|"} { diff --git a/sojorn_app/lib/services/content_filter.dart b/sojorn_app/lib/services/content_filter.dart index d0a3de5..b9d9da4 100644 --- a/sojorn_app/lib/services/content_filter.dart +++ b/sojorn_app/lib/services/content_filter.dart @@ -40,35 +40,35 @@ class ContentFilter { // Hard-blocked patterns — these match slurs and direct threats. // Mirrors the server-side patterns in content_filter.go. static final List _hardBlockPatterns = [ - // N-word and variants - RegExp(r'\bn[i1!|l][gq9][gq9]+[e3a@]?[r0d]?s?\b', caseSensitive: false), - RegExp(r'\bn[i1!|l][gq9]+[aA@]\b', caseSensitive: false), - RegExp(r'\bn\s*[i1!]\s*[gq9]\s*[gq9]\s*[e3a]?\s*[r0]?\b', caseSensitive: false), + // N-word and variants (no \b — catches concatenated slurs like 'niggerfag') + RegExp(r'n[i1!|l][gq9][gq9]+[e3a@]?[r0d]?s?', caseSensitive: false), + RegExp(r'n[i1!|l][gq9]+[aA@]', caseSensitive: false), + RegExp(r'n\s*[i1!]\s*[gq9]\s*[gq9]\s*[e3a]?\s*[r0]?', caseSensitive: false), // F-word (homophobic slur) and variants - RegExp(r'\bf[a@4][gq9][gq9]?[o0]?[t7]?s?\b', caseSensitive: false), - RegExp(r'\bf\s*[a@4]\s*[gq9]\s*[gq9]?\s*[o0]?\s*[t7]?\b', caseSensitive: false), + RegExp(r'f[a@4][gq9][gq9]?[o0]?[t7]?s?', caseSensitive: false), + RegExp(r'f\s*[a@4]\s*[gq9]\s*[gq9]?\s*[o0]?\s*[t7]?', caseSensitive: false), // K-word (anti-Jewish slur) - RegExp(r'\bk[i1][k]+[e3]?s?\b', caseSensitive: false), + RegExp(r'k[i1][k]+[e3]?s?', caseSensitive: false), // C-word (racial slur against Asian people) - RegExp(r'\bch[i1]n[k]+s?\b', caseSensitive: false), + RegExp(r'ch[i1]n[k]+s?', caseSensitive: false), // S-word (anti-Hispanic slur) - RegExp(r'\bsp[i1][ck]+s?\b', caseSensitive: false), + RegExp(r'sp[i1][ck]+s?', caseSensitive: false), // W-word (racial slur) - RegExp(r'\bw[e3][t7]b[a@]ck+s?\b', caseSensitive: false), + RegExp(r'w[e3][t7]b[a@]ck+s?', caseSensitive: false), // R-word (ableist slur) - RegExp(r'\br[e3]t[a@]rd+s?\b', caseSensitive: false), + RegExp(r'r[e3]t[a@]rd+s?', caseSensitive: false), // T-word (transphobic slur) - RegExp(r'\btr[a@4]nn[yie]+s?\b', caseSensitive: false), + RegExp(r'tr[a@4]nn[yie]+s?', caseSensitive: false), - // Direct death/violence threats - RegExp(r"\b(i('?m| am) go(ing|nna)|i('?ll| will)) (to )?(kill|murder|shoot|stab|rape)\b", caseSensitive: false), - RegExp(r'\b(kill|murder|shoot|stab|rape) (you|them|him|her|all)\b', caseSensitive: false), + // Direct death/violence threats (keep \b for sentence structure) + RegExp(r"(i('?m| am) go(ing|nna)|i('?ll| will)) (to )?(kill|murder|shoot|stab|rape)", caseSensitive: false), + RegExp(r'(kill|murder|shoot|stab|rape) (you|them|him|her|all)', caseSensitive: false), ]; }