/** * Content Filtering with OpenAI Moderation API * * Philosophy: * 1. Block slurs immediately (zero tolerance) * 2. Send to OpenAI Moderation API for additional checking * 3. Everything else is allowed */ export type ToneLabel = 'positive' | 'neutral' | 'mixed' | 'negative' | 'hostile' | 'hate'; export interface ToneAnalysis { tone: ToneLabel; cis: number; // content integrity score (0-1) flags: string[]; // detected patterns shouldReject: boolean; rejectReason?: string; } // Slurs - zero tolerance (block immediately) const SLURS = [ // Racial slurs 'nigger', 'nigga', 'negro', 'chink', 'gook', 'spic', 'wetback', 'raghead', 'sandnigger', 'coon', 'darkie', 'jap', 'zipperhead', 'mex', // Homophobic slurs 'faggot', 'fag', 'fags', 'dyke', 'tranny', 'trannie', 'homo', 'lez', 'lesbo', 'queer', // Other 'kike', 'spook', 'simian', 'groids', 'currymuncher', 'paki', 'cunt', ]; const OPENAI_MODERATION_URL = 'https://api.openai.com/v1/moderations'; /** * Analyze text - first check slurs, then send to OpenAI Moderation API */ export async function analyzeTone(text: string): Promise { const flags: string[] = []; const lowerText = text.toLowerCase(); // Check for slurs (zero tolerance - block immediately) const foundSlurs = SLURS.filter(slug => lowerText.includes(slug)); if (foundSlurs.length > 0) { return { tone: 'hate', cis: 0.0, flags: foundSlurs, shouldReject: true, rejectReason: 'This content contains slurs which are not allowed.', }; } // Send to OpenAI Moderation API for additional checking const openAiKey = Deno.env.get('OPEN_AI'); console.log('OPEN_AI key exists:', !!openAiKey); if (openAiKey) { try { console.log('Sending to OpenAI Moderation API, text:', text); const response = await fetch(OPENAI_MODERATION_URL, { method: 'POST', headers: { 'Authorization': `Bearer ${openAiKey}`, 'Content-Type': 'application/json', }, body: JSON.stringify({ model: 'omni-moderation-latest', input: text }), }); if (response.ok) { const data = await response.json(); const result = data.results[0]; // Check various categories (using correct OpenAI category names) const categories = result.categories; if (categories['hate'] || categories['hate/threatening']) { return { tone: 'hate', cis: 0.0, flags: ['openai_hate'], shouldReject: true, rejectReason: 'This content was flagged by moderation.', }; } if (categories['harassment'] || categories['harassment/threatening']) { return { tone: 'hostile', cis: 0.1, flags: ['openai_harassment'], shouldReject: true, rejectReason: 'This content contains harassment.', }; } if (categories['sexual'] || categories['sexual/minors']) { return { tone: 'hostile', cis: 0.1, flags: ['openai_sexual'], shouldReject: true, rejectReason: 'This content is not appropriate.', }; } if (categories['violence'] || categories['violence/graphic']) { return { tone: 'hostile', cis: 0.1, flags: ['openai_violence'], shouldReject: true, rejectReason: 'This content contains violence.', }; } if (categories['self-harm'] || categories['self-harm/intent'] || categories['self-harm/instructions']) { return { tone: 'hostile', cis: 0.1, flags: ['openai_self_harm'], shouldReject: true, rejectReason: 'This content contains self-harm references.', }; } } } catch (e) { console.error('OpenAI moderation error:', e); // Continue with basic analysis if moderation API fails } } // Determine tone based on basic sentiment const hasProfanity = /fuck|shit|damn|ass|bitch|dick|cock|pussy|cunt|hell|bastard/i.test(text); const isPositive = /love|thank|grateful|appreciate|happy|joy|peace|calm|beautiful|wonderful|amazing|great/i.test(text); const isNegative = /hate|angry|furious|enraged|upset|sad|depressed|hopeless|worthless|terrible/i.test(text); let tone: ToneLabel; let cis: number; if (isPositive && !isNegative) { tone = 'positive'; cis = 0.9; } else if (isNegative && !isPositive) { tone = 'negative'; cis = 0.5; flags.push('negative_tone'); } else if (hasProfanity) { tone = 'neutral'; cis = 0.7; flags.push('profanity'); } else { tone = 'neutral'; cis = 0.8; } return { tone, cis, flags, shouldReject: false }; } /** * Generate user-facing feedback for rejected content */ export function getRewriteSuggestion(analysis: ToneAnalysis): string { if (analysis.tone === 'hate') { return 'Slurs are not allowed on sojorn.'; } if (analysis.tone === 'hostile') { return 'Sharp speech does not travel here. Consider softening your words.'; } if (analysis.tone === 'negative') { return 'This reads as negative. If you want it to reach others, try reframing.'; } return 'Consider adjusting your tone for better engagement.'; }