Fix content filter: remove word boundaries to catch concatenated slurs

This commit is contained in:
Patrick Britton 2026-02-06 12:01:19 -06:00
parent b5002c1ce4
commit 70fa1dddca
2 changed files with 30 additions and 30 deletions

View file

@ -53,36 +53,36 @@ func (cf *ContentFilter) buildPatterns() {
// Hard-blocked slurs — these NEVER get posted.
// Patterns use (?i) for case-insensitive and flexible char matching.
entries := []entry{
// N-word and variants
{`(?i)\bn[i1!|l][gq9][gq9]+[e3a@]?[r0d]?s?\b`, "slur", "hard"},
{`(?i)\bn[i1!|l][gq9]+[aA@]\b`, "slur", "hard"},
{`(?i)\bn\s*[i1!]\s*[gq9]\s*[gq9]\s*[e3a]?\s*[r0]?\b`, "slur", "hard"},
// N-word and variants (no \b — catches concatenated slurs)
{`(?i)n[i1!|l][gq9][gq9]+[e3a@]?[r0d]?s?`, "slur", "hard"},
{`(?i)n[i1!|l][gq9]+[aA@]`, "slur", "hard"},
{`(?i)n\s*[i1!]\s*[gq9]\s*[gq9]\s*[e3a]?\s*[r0]?`, "slur", "hard"},
// F-word (homophobic slur) and variants
{`(?i)\bf[a@4][gq9][gq9]?[o0]?[t7]?s?\b`, "slur", "hard"},
{`(?i)\bf\s*[a@4]\s*[gq9]\s*[gq9]?\s*[o0]?\s*[t7]?\b`, "slur", "hard"},
{`(?i)f[a@4][gq9][gq9]?[o0]?[t7]?s?`, "slur", "hard"},
{`(?i)f\s*[a@4]\s*[gq9]\s*[gq9]?\s*[o0]?\s*[t7]?`, "slur", "hard"},
// K-word (anti-Jewish slur)
{`(?i)\bk[i1][k]+[e3]?s?\b`, "slur", "hard"},
{`(?i)k[i1][k]+[e3]?s?`, "slur", "hard"},
// C-word (racial slur against Asian people)
{`(?i)\bch[i1]n[k]+s?\b`, "slur", "hard"},
{`(?i)ch[i1]n[k]+s?`, "slur", "hard"},
// S-word (anti-Hispanic slur)
{`(?i)\bsp[i1][ck]+s?\b`, "slur", "hard"},
{`(?i)sp[i1][ck]+s?`, "slur", "hard"},
// W-word (racial slur)
{`(?i)\bw[e3][t7]b[a@]ck+s?\b`, "slur", "hard"},
{`(?i)w[e3][t7]b[a@]ck+s?`, "slur", "hard"},
// R-word (ableist slur)
{`(?i)\br[e3]t[a@]rd+s?\b`, "slur", "hard"},
{`(?i)r[e3]t[a@]rd+s?`, "slur", "hard"},
// T-word (transphobic slur)
{`(?i)\btr[a@4]nn[yie]+s?\b`, "slur", "hard"},
{`(?i)tr[a@4]nn[yie]+s?`, "slur", "hard"},
// Direct death/violence threats
{`(?i)\b(i('?m| am) go(ing|nna)|i('?ll| will)) (to )?(kill|murder|shoot|stab|rape)\b`, "threat", "hard"},
{`(?i)\b(kill|murder|shoot|stab|rape) (you|them|him|her|all)\b`, "threat", "hard"},
{`(?i)(i('?m| am) go(ing|nna)|i('?ll| will)) (to )?(kill|murder|shoot|stab|rape)`, "threat", "hard"},
{`(?i)(kill|murder|shoot|stab|rape) (you|them|him|her|all)`, "threat", "hard"},
}
cf.patterns = make([]*blockedPattern, 0, len(entries))

View file

@ -40,35 +40,35 @@ class ContentFilter {
// Hard-blocked patterns these match slurs and direct threats.
// Mirrors the server-side patterns in content_filter.go.
static final List<RegExp> _hardBlockPatterns = [
// N-word and variants
RegExp(r'\bn[i1!|l][gq9][gq9]+[e3a@]?[r0d]?s?\b', caseSensitive: false),
RegExp(r'\bn[i1!|l][gq9]+[aA@]\b', caseSensitive: false),
RegExp(r'\bn\s*[i1!]\s*[gq9]\s*[gq9]\s*[e3a]?\s*[r0]?\b', caseSensitive: false),
// N-word and variants (no \b catches concatenated slurs like 'niggerfag')
RegExp(r'n[i1!|l][gq9][gq9]+[e3a@]?[r0d]?s?', caseSensitive: false),
RegExp(r'n[i1!|l][gq9]+[aA@]', caseSensitive: false),
RegExp(r'n\s*[i1!]\s*[gq9]\s*[gq9]\s*[e3a]?\s*[r0]?', caseSensitive: false),
// F-word (homophobic slur) and variants
RegExp(r'\bf[a@4][gq9][gq9]?[o0]?[t7]?s?\b', caseSensitive: false),
RegExp(r'\bf\s*[a@4]\s*[gq9]\s*[gq9]?\s*[o0]?\s*[t7]?\b', caseSensitive: false),
RegExp(r'f[a@4][gq9][gq9]?[o0]?[t7]?s?', caseSensitive: false),
RegExp(r'f\s*[a@4]\s*[gq9]\s*[gq9]?\s*[o0]?\s*[t7]?', caseSensitive: false),
// K-word (anti-Jewish slur)
RegExp(r'\bk[i1][k]+[e3]?s?\b', caseSensitive: false),
RegExp(r'k[i1][k]+[e3]?s?', caseSensitive: false),
// C-word (racial slur against Asian people)
RegExp(r'\bch[i1]n[k]+s?\b', caseSensitive: false),
RegExp(r'ch[i1]n[k]+s?', caseSensitive: false),
// S-word (anti-Hispanic slur)
RegExp(r'\bsp[i1][ck]+s?\b', caseSensitive: false),
RegExp(r'sp[i1][ck]+s?', caseSensitive: false),
// W-word (racial slur)
RegExp(r'\bw[e3][t7]b[a@]ck+s?\b', caseSensitive: false),
RegExp(r'w[e3][t7]b[a@]ck+s?', caseSensitive: false),
// R-word (ableist slur)
RegExp(r'\br[e3]t[a@]rd+s?\b', caseSensitive: false),
RegExp(r'r[e3]t[a@]rd+s?', caseSensitive: false),
// T-word (transphobic slur)
RegExp(r'\btr[a@4]nn[yie]+s?\b', caseSensitive: false),
RegExp(r'tr[a@4]nn[yie]+s?', caseSensitive: false),
// Direct death/violence threats
RegExp(r"\b(i('?m| am) go(ing|nna)|i('?ll| will)) (to )?(kill|murder|shoot|stab|rape)\b", caseSensitive: false),
RegExp(r'\b(kill|murder|shoot|stab|rape) (you|them|him|her|all)\b', caseSensitive: false),
// Direct death/violence threats (keep \b for sentence structure)
RegExp(r"(i('?m| am) go(ing|nna)|i('?ll| will)) (to )?(kill|murder|shoot|stab|rape)", caseSensitive: false),
RegExp(r'(kill|murder|shoot|stab|rape) (you|them|him|her|all)', caseSensitive: false),
];
}