Fix content filter: remove word boundaries to catch concatenated slurs
This commit is contained in:
parent
b5002c1ce4
commit
70fa1dddca
|
|
@ -53,36 +53,36 @@ func (cf *ContentFilter) buildPatterns() {
|
|||
// Hard-blocked slurs — these NEVER get posted.
|
||||
// Patterns use (?i) for case-insensitive and flexible char matching.
|
||||
entries := []entry{
|
||||
// N-word and variants
|
||||
{`(?i)\bn[i1!|l][gq9][gq9]+[e3a@]?[r0d]?s?\b`, "slur", "hard"},
|
||||
{`(?i)\bn[i1!|l][gq9]+[aA@]\b`, "slur", "hard"},
|
||||
{`(?i)\bn\s*[i1!]\s*[gq9]\s*[gq9]\s*[e3a]?\s*[r0]?\b`, "slur", "hard"},
|
||||
// N-word and variants (no \b — catches concatenated slurs)
|
||||
{`(?i)n[i1!|l][gq9][gq9]+[e3a@]?[r0d]?s?`, "slur", "hard"},
|
||||
{`(?i)n[i1!|l][gq9]+[aA@]`, "slur", "hard"},
|
||||
{`(?i)n\s*[i1!]\s*[gq9]\s*[gq9]\s*[e3a]?\s*[r0]?`, "slur", "hard"},
|
||||
|
||||
// F-word (homophobic slur) and variants
|
||||
{`(?i)\bf[a@4][gq9][gq9]?[o0]?[t7]?s?\b`, "slur", "hard"},
|
||||
{`(?i)\bf\s*[a@4]\s*[gq9]\s*[gq9]?\s*[o0]?\s*[t7]?\b`, "slur", "hard"},
|
||||
{`(?i)f[a@4][gq9][gq9]?[o0]?[t7]?s?`, "slur", "hard"},
|
||||
{`(?i)f\s*[a@4]\s*[gq9]\s*[gq9]?\s*[o0]?\s*[t7]?`, "slur", "hard"},
|
||||
|
||||
// K-word (anti-Jewish slur)
|
||||
{`(?i)\bk[i1][k]+[e3]?s?\b`, "slur", "hard"},
|
||||
{`(?i)k[i1][k]+[e3]?s?`, "slur", "hard"},
|
||||
|
||||
// C-word (racial slur against Asian people)
|
||||
{`(?i)\bch[i1]n[k]+s?\b`, "slur", "hard"},
|
||||
{`(?i)ch[i1]n[k]+s?`, "slur", "hard"},
|
||||
|
||||
// S-word (anti-Hispanic slur)
|
||||
{`(?i)\bsp[i1][ck]+s?\b`, "slur", "hard"},
|
||||
{`(?i)sp[i1][ck]+s?`, "slur", "hard"},
|
||||
|
||||
// W-word (racial slur)
|
||||
{`(?i)\bw[e3][t7]b[a@]ck+s?\b`, "slur", "hard"},
|
||||
{`(?i)w[e3][t7]b[a@]ck+s?`, "slur", "hard"},
|
||||
|
||||
// R-word (ableist slur)
|
||||
{`(?i)\br[e3]t[a@]rd+s?\b`, "slur", "hard"},
|
||||
{`(?i)r[e3]t[a@]rd+s?`, "slur", "hard"},
|
||||
|
||||
// T-word (transphobic slur)
|
||||
{`(?i)\btr[a@4]nn[yie]+s?\b`, "slur", "hard"},
|
||||
{`(?i)tr[a@4]nn[yie]+s?`, "slur", "hard"},
|
||||
|
||||
// Direct death/violence threats
|
||||
{`(?i)\b(i('?m| am) go(ing|nna)|i('?ll| will)) (to )?(kill|murder|shoot|stab|rape)\b`, "threat", "hard"},
|
||||
{`(?i)\b(kill|murder|shoot|stab|rape) (you|them|him|her|all)\b`, "threat", "hard"},
|
||||
{`(?i)(i('?m| am) go(ing|nna)|i('?ll| will)) (to )?(kill|murder|shoot|stab|rape)`, "threat", "hard"},
|
||||
{`(?i)(kill|murder|shoot|stab|rape) (you|them|him|her|all)`, "threat", "hard"},
|
||||
}
|
||||
|
||||
cf.patterns = make([]*blockedPattern, 0, len(entries))
|
||||
|
|
@ -192,7 +192,7 @@ func normalizeText(text string) string {
|
|||
text = strings.ReplaceAll(text, "\u200b", "") // zero-width space
|
||||
text = strings.ReplaceAll(text, "\u200c", "") // zero-width non-joiner
|
||||
text = strings.ReplaceAll(text, "\u200d", "") // zero-width joiner
|
||||
text = strings.ReplaceAll(text, "\ufeff", "") // BOM
|
||||
text = strings.ReplaceAll(text, "\ufeff", "") // BOM
|
||||
|
||||
// Remove common separator characters used to evade filters
|
||||
for _, ch := range []string{".", "-", "_", "*", "|"} {
|
||||
|
|
|
|||
|
|
@ -40,35 +40,35 @@ class ContentFilter {
|
|||
// Hard-blocked patterns — these match slurs and direct threats.
|
||||
// Mirrors the server-side patterns in content_filter.go.
|
||||
static final List<RegExp> _hardBlockPatterns = [
|
||||
// N-word and variants
|
||||
RegExp(r'\bn[i1!|l][gq9][gq9]+[e3a@]?[r0d]?s?\b', caseSensitive: false),
|
||||
RegExp(r'\bn[i1!|l][gq9]+[aA@]\b', caseSensitive: false),
|
||||
RegExp(r'\bn\s*[i1!]\s*[gq9]\s*[gq9]\s*[e3a]?\s*[r0]?\b', caseSensitive: false),
|
||||
// N-word and variants (no \b — catches concatenated slurs like 'niggerfag')
|
||||
RegExp(r'n[i1!|l][gq9][gq9]+[e3a@]?[r0d]?s?', caseSensitive: false),
|
||||
RegExp(r'n[i1!|l][gq9]+[aA@]', caseSensitive: false),
|
||||
RegExp(r'n\s*[i1!]\s*[gq9]\s*[gq9]\s*[e3a]?\s*[r0]?', caseSensitive: false),
|
||||
|
||||
// F-word (homophobic slur) and variants
|
||||
RegExp(r'\bf[a@4][gq9][gq9]?[o0]?[t7]?s?\b', caseSensitive: false),
|
||||
RegExp(r'\bf\s*[a@4]\s*[gq9]\s*[gq9]?\s*[o0]?\s*[t7]?\b', caseSensitive: false),
|
||||
RegExp(r'f[a@4][gq9][gq9]?[o0]?[t7]?s?', caseSensitive: false),
|
||||
RegExp(r'f\s*[a@4]\s*[gq9]\s*[gq9]?\s*[o0]?\s*[t7]?', caseSensitive: false),
|
||||
|
||||
// K-word (anti-Jewish slur)
|
||||
RegExp(r'\bk[i1][k]+[e3]?s?\b', caseSensitive: false),
|
||||
RegExp(r'k[i1][k]+[e3]?s?', caseSensitive: false),
|
||||
|
||||
// C-word (racial slur against Asian people)
|
||||
RegExp(r'\bch[i1]n[k]+s?\b', caseSensitive: false),
|
||||
RegExp(r'ch[i1]n[k]+s?', caseSensitive: false),
|
||||
|
||||
// S-word (anti-Hispanic slur)
|
||||
RegExp(r'\bsp[i1][ck]+s?\b', caseSensitive: false),
|
||||
RegExp(r'sp[i1][ck]+s?', caseSensitive: false),
|
||||
|
||||
// W-word (racial slur)
|
||||
RegExp(r'\bw[e3][t7]b[a@]ck+s?\b', caseSensitive: false),
|
||||
RegExp(r'w[e3][t7]b[a@]ck+s?', caseSensitive: false),
|
||||
|
||||
// R-word (ableist slur)
|
||||
RegExp(r'\br[e3]t[a@]rd+s?\b', caseSensitive: false),
|
||||
RegExp(r'r[e3]t[a@]rd+s?', caseSensitive: false),
|
||||
|
||||
// T-word (transphobic slur)
|
||||
RegExp(r'\btr[a@4]nn[yie]+s?\b', caseSensitive: false),
|
||||
RegExp(r'tr[a@4]nn[yie]+s?', caseSensitive: false),
|
||||
|
||||
// Direct death/violence threats
|
||||
RegExp(r"\b(i('?m| am) go(ing|nna)|i('?ll| will)) (to )?(kill|murder|shoot|stab|rape)\b", caseSensitive: false),
|
||||
RegExp(r'\b(kill|murder|shoot|stab|rape) (you|them|him|her|all)\b', caseSensitive: false),
|
||||
// Direct death/violence threats (keep \b for sentence structure)
|
||||
RegExp(r"(i('?m| am) go(ing|nna)|i('?ll| will)) (to )?(kill|murder|shoot|stab|rape)", caseSensitive: false),
|
||||
RegExp(r'(kill|murder|shoot|stab|rape) (you|them|him|her|all)', caseSensitive: false),
|
||||
];
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue