336 lines
11 KiB
Go
336 lines
11 KiB
Go
package services
|
|
|
|
import (
|
|
"context"
|
|
"regexp"
|
|
"strings"
|
|
|
|
"github.com/jackc/pgx/v5/pgxpool"
|
|
)
|
|
|
|
type UsernameViolation int
|
|
|
|
const (
|
|
UsernameOK UsernameViolation = iota
|
|
UsernameReserved
|
|
UsernameInappropriate
|
|
UsernameInvalidFormat
|
|
)
|
|
|
|
type UsernameCheckResult struct {
|
|
Violation UsernameViolation
|
|
Message string
|
|
}
|
|
|
|
// ValidateUsernameWithDB checks a handle against reserved names (hardcoded + DB),
|
|
// inappropriate words, and format rules.
|
|
func ValidateUsernameWithDB(ctx context.Context, pool *pgxpool.Pool, handle string) UsernameCheckResult {
|
|
result := ValidateUsername(handle)
|
|
if result.Violation != UsernameOK {
|
|
return result
|
|
}
|
|
|
|
// Also check DB reserved_usernames table
|
|
if pool != nil {
|
|
var count int
|
|
err := pool.QueryRow(ctx, `SELECT COUNT(*) FROM reserved_usernames WHERE username = $1`, strings.ToLower(strings.TrimSpace(handle))).Scan(&count)
|
|
if err == nil && count > 0 {
|
|
return UsernameCheckResult{
|
|
UsernameReserved,
|
|
"This username is reserved. If you officially represent this brand, company, or public figure, you can submit a verification request at support@sojorn.net to claim it.",
|
|
}
|
|
}
|
|
}
|
|
|
|
return UsernameCheckResult{UsernameOK, ""}
|
|
}
|
|
|
|
// ValidateUsername checks a handle against reserved names, inappropriate words,
|
|
// and format rules. Returns a result with a user-facing message.
|
|
func ValidateUsername(handle string) UsernameCheckResult {
|
|
h := strings.ToLower(strings.TrimSpace(handle))
|
|
|
|
// Format check
|
|
if len(h) < 3 || len(h) > 30 {
|
|
return UsernameCheckResult{UsernameInvalidFormat, "Username must be between 3 and 30 characters."}
|
|
}
|
|
if !validHandleRegex.MatchString(h) {
|
|
return UsernameCheckResult{UsernameInvalidFormat, "Username can only contain letters, numbers, underscores, and periods."}
|
|
}
|
|
|
|
// Reserved check
|
|
if isReserved(h) {
|
|
return UsernameCheckResult{
|
|
UsernameReserved,
|
|
"This username is reserved. If you officially represent this brand, company, or public figure, you can submit a verification request at support@sojorn.net to claim it.",
|
|
}
|
|
}
|
|
|
|
// Inappropriate check
|
|
if reason := isInappropriate(h); reason != "" {
|
|
return UsernameCheckResult{UsernameInappropriate, "This username is not allowed: " + reason}
|
|
}
|
|
|
|
return UsernameCheckResult{UsernameOK, ""}
|
|
}
|
|
|
|
// ValidateDisplayName checks a display name for inappropriate content.
|
|
func ValidateDisplayName(name string) UsernameCheckResult {
|
|
n := strings.ToLower(strings.TrimSpace(name))
|
|
if len(n) == 0 || len(n) > 50 {
|
|
return UsernameCheckResult{UsernameInvalidFormat, "Display name must be between 1 and 50 characters."}
|
|
}
|
|
if reason := isInappropriate(n); reason != "" {
|
|
return UsernameCheckResult{UsernameInappropriate, "This display name is not allowed: " + reason}
|
|
}
|
|
return UsernameCheckResult{UsernameOK, ""}
|
|
}
|
|
|
|
var validHandleRegex = regexp.MustCompile(`^[a-z0-9_.]+$`)
|
|
|
|
// -------------------------------------------------------------------
|
|
// Reserved usernames
|
|
// -------------------------------------------------------------------
|
|
|
|
func isReserved(h string) bool {
|
|
// Exact match
|
|
if reservedSet[h] {
|
|
return true
|
|
}
|
|
// Prefix match (e.g. "sojorn_anything", "admin_anything")
|
|
for _, prefix := range reservedPrefixes {
|
|
if strings.HasPrefix(h, prefix) {
|
|
return true
|
|
}
|
|
}
|
|
// Contains match for brand names that shouldn't appear even as substrings
|
|
for _, substr := range reservedSubstrings {
|
|
if strings.Contains(h, substr) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// Platform terms, system accounts, and roles
|
|
var platformReserved = []string{
|
|
"sojorn", "admin", "administrator", "moderator", "mod",
|
|
"support", "help", "helpdesk", "system", "official",
|
|
"root", "superuser", "staff", "team", "security",
|
|
"abuse", "postmaster", "webmaster", "info", "contact",
|
|
"noreply", "no_reply", "mailer", "daemon", "bot",
|
|
"api", "dev", "developer", "ceo", "cto", "cfo", "coo",
|
|
"founder", "cofounder", "intern", "hr",
|
|
"legal", "compliance", "privacy", "terms",
|
|
"news", "press", "media", "blog", "status",
|
|
"feedback", "report", "bug", "feature",
|
|
"billing", "payment", "sales", "marketing",
|
|
"everyone", "all", "here", "channel",
|
|
"null", "undefined", "anonymous", "unknown",
|
|
"test", "testing", "demo", "example",
|
|
"signup", "signin", "login", "logout", "register",
|
|
"settings", "account", "profile", "dashboard",
|
|
"home", "feed", "explore", "discover", "search",
|
|
"notification", "notifications", "message", "messages",
|
|
"chat", "dm", "dms", "inbox", "outbox",
|
|
"verified", "verification", "verify",
|
|
"beacon", "beacons", "quip", "quips",
|
|
}
|
|
|
|
// Major tech companies and social platforms
|
|
var techCompanyReserved = []string{
|
|
"google", "apple", "microsoft", "amazon", "meta",
|
|
"facebook", "instagram", "twitter", "tiktok", "snapchat",
|
|
"linkedin", "reddit", "pinterest", "youtube", "twitch",
|
|
"discord", "telegram", "whatsapp", "signal",
|
|
"netflix", "spotify", "hulu", "disney", "disneyplus",
|
|
"openai", "chatgpt", "anthropic", "claude",
|
|
"nvidia", "amd", "intel", "samsung", "sony",
|
|
"tesla", "spacex", "nasa", "boeing", "airbus",
|
|
"uber", "lyft", "airbnb", "doordash", "grubhub",
|
|
"paypal", "stripe", "venmo", "cashapp", "zelle",
|
|
"coinbase", "binance", "robinhood", "fidelity",
|
|
"github", "gitlab", "stackoverflow", "atlassian",
|
|
"slack", "zoom", "teams", "webex",
|
|
"shopify", "squarespace", "wordpress", "wix",
|
|
"dropbox", "icloud", "onedrive",
|
|
"oracle", "ibm", "salesforce", "adobe", "canva",
|
|
}
|
|
|
|
// Major brands and corporations
|
|
var brandReserved = []string{
|
|
"nike", "adidas", "puma", "reebok", "underarmour",
|
|
"cocacola", "coca_cola", "pepsi", "starbucks", "mcdonalds",
|
|
"walmart", "target", "costco", "kroger", "wholefoods",
|
|
"bmw", "mercedes", "audi", "porsche", "ferrari",
|
|
"lamborghini", "ford", "chevrolet", "toyota", "honda",
|
|
"gucci", "louisvuitton", "chanel", "prada", "hermes",
|
|
"rolex", "cartier", "tiffany", "burberry",
|
|
"nfl", "nba", "mlb", "nhl", "mls", "fifa", "ufc",
|
|
"espn", "cnn", "bbc", "foxnews", "msnbc", "nytimes",
|
|
"washingtonpost", "wsj", "reuters", "apnews",
|
|
"marvel", "dccomics", "nintendo", "playstation", "xbox",
|
|
"paramount", "warner", "universal",
|
|
}
|
|
|
|
// Public figures, politicians, and notable people
|
|
var publicFigureReserved = []string{
|
|
"elonmusk", "elon_musk", "jeffbezos", "jeff_bezos",
|
|
"markzuckerberg", "mark_zuckerberg", "zuckerberg",
|
|
"timcook", "tim_cook", "billgates", "bill_gates",
|
|
"satyanadella", "sundarpichai", "samaltman",
|
|
"joebiden", "joe_biden", "donaldtrump", "donald_trump",
|
|
"barackobama", "barack_obama", "kamalaharris",
|
|
"taylorswift", "taylor_swift", "beyonce", "rihanna",
|
|
"drake", "kanyewest", "kanye_west", "ye",
|
|
"kimkardashian", "kim_kardashian", "kyliejenner",
|
|
"cristiano", "ronaldo", "messi", "lebronjames", "lebron_james",
|
|
"therock", "the_rock", "dwaynejohnson",
|
|
"mrbeaast", "mrbeast", "pewdiepie", "ninja",
|
|
"joerogan", "joe_rogan", "oprah",
|
|
"pope", "popefrancis", "dalailama",
|
|
}
|
|
|
|
var reservedPrefixes = []string{
|
|
"sojorn_", "sojorn.", "official_", "official.",
|
|
"admin_", "admin.", "mod_", "mod.",
|
|
"support_", "support.", "team_", "team.",
|
|
"staff_", "staff.", "system_", "system.",
|
|
}
|
|
|
|
var reservedSubstrings = []string{
|
|
"sojorn",
|
|
}
|
|
|
|
var reservedSet map[string]bool
|
|
|
|
func init() {
|
|
reservedSet = make(map[string]bool)
|
|
for _, lists := range [][]string{
|
|
platformReserved,
|
|
techCompanyReserved,
|
|
brandReserved,
|
|
publicFigureReserved,
|
|
} {
|
|
for _, name := range lists {
|
|
reservedSet[name] = true
|
|
}
|
|
}
|
|
}
|
|
|
|
// -------------------------------------------------------------------
|
|
// Inappropriate content filter
|
|
// -------------------------------------------------------------------
|
|
|
|
func isInappropriate(text string) string {
|
|
// Remove common substitutions for bypass attempts
|
|
normalized := normalizeUsername(text)
|
|
|
|
for _, entry := range inappropriatePatterns {
|
|
if entry.regex.MatchString(normalized) || entry.regex.MatchString(text) {
|
|
return entry.reason
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
type inappropriateEntry struct {
|
|
regex *regexp.Regexp
|
|
reason string
|
|
}
|
|
|
|
var inappropriatePatterns []inappropriateEntry
|
|
|
|
func init() {
|
|
type raw struct {
|
|
pattern string
|
|
reason string
|
|
}
|
|
entries := []raw{
|
|
// Slurs and hate speech
|
|
{`\bn[i1!|]gg[e3a@][r]?\b`, "contains a racial slur"},
|
|
{`\bf[a@]gg?[o0][t]?\b`, "contains a homophobic slur"},
|
|
{`\bk[i1!]ke\b`, "contains an antisemitic slur"},
|
|
{`\bsp[i1!]c\b`, "contains a racial slur"},
|
|
{`\bch[i1!]nk\b`, "contains a racial slur"},
|
|
{`\bw[e3]tb[a@]ck\b`, "contains a racial slur"},
|
|
{`\bcoon\b`, "contains a racial slur"},
|
|
{`\btr[a@]nn[yie]\b`, "contains a transphobic slur"},
|
|
{`\bdyke\b`, "contains a homophobic slur"},
|
|
{`\bretard(ed)?\b`, "contains an ableist slur"},
|
|
|
|
// Sexually explicit
|
|
{`\bp[o0]rn`, "contains sexually explicit content"},
|
|
{`\bx{2,}`, "contains sexually explicit content"},
|
|
{`\bhentai\b`, "contains sexually explicit content"},
|
|
{`\bcum(sl[u]t|dump|bucket)\b`, "contains sexually explicit content"},
|
|
{`\bpussy\b`, "contains sexually explicit content"},
|
|
{`\bd[i1!]ck(head|face|sucker)`, "contains sexually explicit content"},
|
|
{`\bc[o0]ck(sucker)?`, "contains sexually explicit content"},
|
|
|
|
// Violent / threatening
|
|
{`\bk[i1!]ll(er)?_(yo)?u`, "contains threatening language"},
|
|
{`\bschool.?shoot`, "contains violent content"},
|
|
{`\bmass.?murder`, "contains violent content"},
|
|
{`\bgenocide\b`, "contains violent content"},
|
|
{`\bterroris[tm]`, "contains references to terrorism"},
|
|
{`\bisis\b`, "contains references to terrorism"},
|
|
{`\bal.?qaeda\b`, "contains references to terrorism"},
|
|
{`\bjihad(i|ist)?\b`, "contains references to terrorism"},
|
|
|
|
// Drugs (hard)
|
|
{`\bmeth(head|lab)\b`, "contains drug references"},
|
|
{`\bcrackhead\b`, "contains drug references"},
|
|
{`\bheroin(e)?\b`, "contains drug references"},
|
|
{`\bfentanyl\b`, "contains drug references"},
|
|
|
|
// Impersonation indicators
|
|
{`\breal_?\b`, "may imply impersonation"},
|
|
{`\bthe_?real\b`, "may imply impersonation"},
|
|
{`\bofficial_\b`, "may imply impersonation"},
|
|
{`\bnot_?fake\b`, "may imply impersonation"},
|
|
|
|
// Scam / fraud
|
|
{`\bfree.?money\b`, "suggests fraudulent activity"},
|
|
{`\bcrypto.?scam\b`, "suggests fraudulent activity"},
|
|
{`\bget.?rich\b`, "suggests fraudulent activity"},
|
|
|
|
// Self-harm
|
|
{`\bsu[i1!]c[i1!]de\b`, "contains references to self-harm"},
|
|
{`\bkill.?myself\b`, "contains references to self-harm"},
|
|
{`\bcut.?myself\b`, "contains references to self-harm"},
|
|
|
|
// General profanity as usernames (strong)
|
|
{`\bfuck`, "contains strong profanity"},
|
|
{`\bsh[i1!]t(head|face|stain)`, "contains strong profanity"},
|
|
{`\bass(hole|wipe|face|hat)`, "contains strong profanity"},
|
|
{`\bbitch\b`, "contains strong profanity"},
|
|
{`\bwhore\b`, "contains strong profanity"},
|
|
{`\bslut\b`, "contains strong profanity"},
|
|
{`\bcunt\b`, "contains strong profanity"},
|
|
}
|
|
|
|
inappropriatePatterns = make([]inappropriateEntry, 0, len(entries))
|
|
for _, e := range entries {
|
|
re := regexp.MustCompile("(?i)" + e.pattern)
|
|
inappropriatePatterns = append(inappropriatePatterns, inappropriateEntry{regex: re, reason: e.reason})
|
|
}
|
|
}
|
|
|
|
// normalizeUsername applies common leet-speak substitutions to catch bypass attempts
|
|
func normalizeUsername(s string) string {
|
|
replacer := strings.NewReplacer(
|
|
"0", "o",
|
|
"1", "i",
|
|
"3", "e",
|
|
"4", "a",
|
|
"5", "s",
|
|
"7", "t",
|
|
"@", "a",
|
|
"$", "s",
|
|
"!", "i",
|
|
"|", "l",
|
|
)
|
|
return replacer.Replace(s)
|
|
}
|