Fix content filter: remove word boundaries to catch concatenated slurs
This commit is contained in:
parent
b5002c1ce4
commit
70fa1dddca
|
|
@ -53,36 +53,36 @@ func (cf *ContentFilter) buildPatterns() {
|
||||||
// Hard-blocked slurs — these NEVER get posted.
|
// Hard-blocked slurs — these NEVER get posted.
|
||||||
// Patterns use (?i) for case-insensitive and flexible char matching.
|
// Patterns use (?i) for case-insensitive and flexible char matching.
|
||||||
entries := []entry{
|
entries := []entry{
|
||||||
// N-word and variants
|
// N-word and variants (no \b — catches concatenated slurs)
|
||||||
{`(?i)\bn[i1!|l][gq9][gq9]+[e3a@]?[r0d]?s?\b`, "slur", "hard"},
|
{`(?i)n[i1!|l][gq9][gq9]+[e3a@]?[r0d]?s?`, "slur", "hard"},
|
||||||
{`(?i)\bn[i1!|l][gq9]+[aA@]\b`, "slur", "hard"},
|
{`(?i)n[i1!|l][gq9]+[aA@]`, "slur", "hard"},
|
||||||
{`(?i)\bn\s*[i1!]\s*[gq9]\s*[gq9]\s*[e3a]?\s*[r0]?\b`, "slur", "hard"},
|
{`(?i)n\s*[i1!]\s*[gq9]\s*[gq9]\s*[e3a]?\s*[r0]?`, "slur", "hard"},
|
||||||
|
|
||||||
// F-word (homophobic slur) and variants
|
// F-word (homophobic slur) and variants
|
||||||
{`(?i)\bf[a@4][gq9][gq9]?[o0]?[t7]?s?\b`, "slur", "hard"},
|
{`(?i)f[a@4][gq9][gq9]?[o0]?[t7]?s?`, "slur", "hard"},
|
||||||
{`(?i)\bf\s*[a@4]\s*[gq9]\s*[gq9]?\s*[o0]?\s*[t7]?\b`, "slur", "hard"},
|
{`(?i)f\s*[a@4]\s*[gq9]\s*[gq9]?\s*[o0]?\s*[t7]?`, "slur", "hard"},
|
||||||
|
|
||||||
// K-word (anti-Jewish slur)
|
// K-word (anti-Jewish slur)
|
||||||
{`(?i)\bk[i1][k]+[e3]?s?\b`, "slur", "hard"},
|
{`(?i)k[i1][k]+[e3]?s?`, "slur", "hard"},
|
||||||
|
|
||||||
// C-word (racial slur against Asian people)
|
// C-word (racial slur against Asian people)
|
||||||
{`(?i)\bch[i1]n[k]+s?\b`, "slur", "hard"},
|
{`(?i)ch[i1]n[k]+s?`, "slur", "hard"},
|
||||||
|
|
||||||
// S-word (anti-Hispanic slur)
|
// S-word (anti-Hispanic slur)
|
||||||
{`(?i)\bsp[i1][ck]+s?\b`, "slur", "hard"},
|
{`(?i)sp[i1][ck]+s?`, "slur", "hard"},
|
||||||
|
|
||||||
// W-word (racial slur)
|
// W-word (racial slur)
|
||||||
{`(?i)\bw[e3][t7]b[a@]ck+s?\b`, "slur", "hard"},
|
{`(?i)w[e3][t7]b[a@]ck+s?`, "slur", "hard"},
|
||||||
|
|
||||||
// R-word (ableist slur)
|
// R-word (ableist slur)
|
||||||
{`(?i)\br[e3]t[a@]rd+s?\b`, "slur", "hard"},
|
{`(?i)r[e3]t[a@]rd+s?`, "slur", "hard"},
|
||||||
|
|
||||||
// T-word (transphobic slur)
|
// T-word (transphobic slur)
|
||||||
{`(?i)\btr[a@4]nn[yie]+s?\b`, "slur", "hard"},
|
{`(?i)tr[a@4]nn[yie]+s?`, "slur", "hard"},
|
||||||
|
|
||||||
// Direct death/violence threats
|
// Direct death/violence threats
|
||||||
{`(?i)\b(i('?m| am) go(ing|nna)|i('?ll| will)) (to )?(kill|murder|shoot|stab|rape)\b`, "threat", "hard"},
|
{`(?i)(i('?m| am) go(ing|nna)|i('?ll| will)) (to )?(kill|murder|shoot|stab|rape)`, "threat", "hard"},
|
||||||
{`(?i)\b(kill|murder|shoot|stab|rape) (you|them|him|her|all)\b`, "threat", "hard"},
|
{`(?i)(kill|murder|shoot|stab|rape) (you|them|him|her|all)`, "threat", "hard"},
|
||||||
}
|
}
|
||||||
|
|
||||||
cf.patterns = make([]*blockedPattern, 0, len(entries))
|
cf.patterns = make([]*blockedPattern, 0, len(entries))
|
||||||
|
|
@ -192,7 +192,7 @@ func normalizeText(text string) string {
|
||||||
text = strings.ReplaceAll(text, "\u200b", "") // zero-width space
|
text = strings.ReplaceAll(text, "\u200b", "") // zero-width space
|
||||||
text = strings.ReplaceAll(text, "\u200c", "") // zero-width non-joiner
|
text = strings.ReplaceAll(text, "\u200c", "") // zero-width non-joiner
|
||||||
text = strings.ReplaceAll(text, "\u200d", "") // zero-width joiner
|
text = strings.ReplaceAll(text, "\u200d", "") // zero-width joiner
|
||||||
text = strings.ReplaceAll(text, "\ufeff", "") // BOM
|
text = strings.ReplaceAll(text, "\ufeff", "") // BOM
|
||||||
|
|
||||||
// Remove common separator characters used to evade filters
|
// Remove common separator characters used to evade filters
|
||||||
for _, ch := range []string{".", "-", "_", "*", "|"} {
|
for _, ch := range []string{".", "-", "_", "*", "|"} {
|
||||||
|
|
|
||||||
|
|
@ -40,35 +40,35 @@ class ContentFilter {
|
||||||
// Hard-blocked patterns — these match slurs and direct threats.
|
// Hard-blocked patterns — these match slurs and direct threats.
|
||||||
// Mirrors the server-side patterns in content_filter.go.
|
// Mirrors the server-side patterns in content_filter.go.
|
||||||
static final List<RegExp> _hardBlockPatterns = [
|
static final List<RegExp> _hardBlockPatterns = [
|
||||||
// N-word and variants
|
// N-word and variants (no \b — catches concatenated slurs like 'niggerfag')
|
||||||
RegExp(r'\bn[i1!|l][gq9][gq9]+[e3a@]?[r0d]?s?\b', caseSensitive: false),
|
RegExp(r'n[i1!|l][gq9][gq9]+[e3a@]?[r0d]?s?', caseSensitive: false),
|
||||||
RegExp(r'\bn[i1!|l][gq9]+[aA@]\b', caseSensitive: false),
|
RegExp(r'n[i1!|l][gq9]+[aA@]', caseSensitive: false),
|
||||||
RegExp(r'\bn\s*[i1!]\s*[gq9]\s*[gq9]\s*[e3a]?\s*[r0]?\b', caseSensitive: false),
|
RegExp(r'n\s*[i1!]\s*[gq9]\s*[gq9]\s*[e3a]?\s*[r0]?', caseSensitive: false),
|
||||||
|
|
||||||
// F-word (homophobic slur) and variants
|
// F-word (homophobic slur) and variants
|
||||||
RegExp(r'\bf[a@4][gq9][gq9]?[o0]?[t7]?s?\b', caseSensitive: false),
|
RegExp(r'f[a@4][gq9][gq9]?[o0]?[t7]?s?', caseSensitive: false),
|
||||||
RegExp(r'\bf\s*[a@4]\s*[gq9]\s*[gq9]?\s*[o0]?\s*[t7]?\b', caseSensitive: false),
|
RegExp(r'f\s*[a@4]\s*[gq9]\s*[gq9]?\s*[o0]?\s*[t7]?', caseSensitive: false),
|
||||||
|
|
||||||
// K-word (anti-Jewish slur)
|
// K-word (anti-Jewish slur)
|
||||||
RegExp(r'\bk[i1][k]+[e3]?s?\b', caseSensitive: false),
|
RegExp(r'k[i1][k]+[e3]?s?', caseSensitive: false),
|
||||||
|
|
||||||
// C-word (racial slur against Asian people)
|
// C-word (racial slur against Asian people)
|
||||||
RegExp(r'\bch[i1]n[k]+s?\b', caseSensitive: false),
|
RegExp(r'ch[i1]n[k]+s?', caseSensitive: false),
|
||||||
|
|
||||||
// S-word (anti-Hispanic slur)
|
// S-word (anti-Hispanic slur)
|
||||||
RegExp(r'\bsp[i1][ck]+s?\b', caseSensitive: false),
|
RegExp(r'sp[i1][ck]+s?', caseSensitive: false),
|
||||||
|
|
||||||
// W-word (racial slur)
|
// W-word (racial slur)
|
||||||
RegExp(r'\bw[e3][t7]b[a@]ck+s?\b', caseSensitive: false),
|
RegExp(r'w[e3][t7]b[a@]ck+s?', caseSensitive: false),
|
||||||
|
|
||||||
// R-word (ableist slur)
|
// R-word (ableist slur)
|
||||||
RegExp(r'\br[e3]t[a@]rd+s?\b', caseSensitive: false),
|
RegExp(r'r[e3]t[a@]rd+s?', caseSensitive: false),
|
||||||
|
|
||||||
// T-word (transphobic slur)
|
// T-word (transphobic slur)
|
||||||
RegExp(r'\btr[a@4]nn[yie]+s?\b', caseSensitive: false),
|
RegExp(r'tr[a@4]nn[yie]+s?', caseSensitive: false),
|
||||||
|
|
||||||
// Direct death/violence threats
|
// Direct death/violence threats (keep \b for sentence structure)
|
||||||
RegExp(r"\b(i('?m| am) go(ing|nna)|i('?ll| will)) (to )?(kill|murder|shoot|stab|rape)\b", caseSensitive: false),
|
RegExp(r"(i('?m| am) go(ing|nna)|i('?ll| will)) (to )?(kill|murder|shoot|stab|rape)", caseSensitive: false),
|
||||||
RegExp(r'\b(kill|murder|shoot|stab|rape) (you|them|him|her|all)\b', caseSensitive: false),
|
RegExp(r'(kill|murder|shoot|stab|rape) (you|them|him|her|all)', caseSensitive: false),
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue