174 lines
5.3 KiB
TypeScript
174 lines
5.3 KiB
TypeScript
/**
|
|
* Content Filtering with OpenAI Moderation API
|
|
*
|
|
* Philosophy:
|
|
* 1. Block slurs immediately (zero tolerance)
|
|
* 2. Send to OpenAI Moderation API for additional checking
|
|
* 3. Everything else is allowed
|
|
*/
|
|
|
|
export type ToneLabel = 'positive' | 'neutral' | 'mixed' | 'negative' | 'hostile' | 'hate';
|
|
|
|
export interface ToneAnalysis {
|
|
tone: ToneLabel;
|
|
cis: number; // content integrity score (0-1)
|
|
flags: string[]; // detected patterns
|
|
shouldReject: boolean;
|
|
rejectReason?: string;
|
|
}
|
|
|
|
// Slurs - zero tolerance (block immediately)
|
|
const SLURS = [
|
|
// Racial slurs
|
|
'nigger', 'nigga', 'negro', 'chink', 'gook', 'spic', 'wetback', 'raghead',
|
|
'sandnigger', 'coon', 'darkie', 'jap', 'zipperhead', 'mex',
|
|
// Homophobic slurs
|
|
'faggot', 'fag', 'fags', 'dyke', 'tranny', 'trannie', 'homo', 'lez', 'lesbo', 'queer',
|
|
// Other
|
|
'kike', 'spook', 'simian', 'groids', 'currymuncher', 'paki', 'cunt',
|
|
];
|
|
|
|
const OPENAI_MODERATION_URL = 'https://api.openai.com/v1/moderations';
|
|
|
|
/**
|
|
* Analyze text - first check slurs, then send to OpenAI Moderation API
|
|
*/
|
|
export async function analyzeTone(text: string): Promise<ToneAnalysis> {
|
|
const flags: string[] = [];
|
|
const lowerText = text.toLowerCase();
|
|
|
|
// Check for slurs (zero tolerance - block immediately)
|
|
const foundSlurs = SLURS.filter(slug => lowerText.includes(slug));
|
|
if (foundSlurs.length > 0) {
|
|
return {
|
|
tone: 'hate',
|
|
cis: 0.0,
|
|
flags: foundSlurs,
|
|
shouldReject: true,
|
|
rejectReason: 'This content contains slurs which are not allowed.',
|
|
};
|
|
}
|
|
|
|
// Send to OpenAI Moderation API for additional checking
|
|
const openAiKey = Deno.env.get('OPEN_AI');
|
|
console.log('OPEN_AI key exists:', !!openAiKey);
|
|
|
|
if (openAiKey) {
|
|
try {
|
|
console.log('Sending to OpenAI Moderation API, text:', text);
|
|
|
|
const response = await fetch(OPENAI_MODERATION_URL, {
|
|
method: 'POST',
|
|
headers: {
|
|
'Authorization': `Bearer ${openAiKey}`,
|
|
'Content-Type': 'application/json',
|
|
},
|
|
body: JSON.stringify({ model: 'omni-moderation-latest', input: text }),
|
|
});
|
|
|
|
if (response.ok) {
|
|
const data = await response.json();
|
|
const result = data.results[0];
|
|
|
|
// Check various categories (using correct OpenAI category names)
|
|
const categories = result.categories;
|
|
|
|
if (categories['hate'] || categories['hate/threatening']) {
|
|
return {
|
|
tone: 'hate',
|
|
cis: 0.0,
|
|
flags: ['openai_hate'],
|
|
shouldReject: true,
|
|
rejectReason: 'This content was flagged by moderation.',
|
|
};
|
|
}
|
|
|
|
if (categories['harassment'] || categories['harassment/threatening']) {
|
|
return {
|
|
tone: 'hostile',
|
|
cis: 0.1,
|
|
flags: ['openai_harassment'],
|
|
shouldReject: true,
|
|
rejectReason: 'This content contains harassment.',
|
|
};
|
|
}
|
|
|
|
if (categories['sexual'] || categories['sexual/minors']) {
|
|
return {
|
|
tone: 'hostile',
|
|
cis: 0.1,
|
|
flags: ['openai_sexual'],
|
|
shouldReject: true,
|
|
rejectReason: 'This content is not appropriate.',
|
|
};
|
|
}
|
|
|
|
if (categories['violence'] || categories['violence/graphic']) {
|
|
return {
|
|
tone: 'hostile',
|
|
cis: 0.1,
|
|
flags: ['openai_violence'],
|
|
shouldReject: true,
|
|
rejectReason: 'This content contains violence.',
|
|
};
|
|
}
|
|
|
|
if (categories['self-harm'] || categories['self-harm/intent'] || categories['self-harm/instructions']) {
|
|
return {
|
|
tone: 'hostile',
|
|
cis: 0.1,
|
|
flags: ['openai_self_harm'],
|
|
shouldReject: true,
|
|
rejectReason: 'This content contains self-harm references.',
|
|
};
|
|
}
|
|
}
|
|
} catch (e) {
|
|
console.error('OpenAI moderation error:', e);
|
|
// Continue with basic analysis if moderation API fails
|
|
}
|
|
}
|
|
|
|
// Determine tone based on basic sentiment
|
|
const hasProfanity = /fuck|shit|damn|ass|bitch|dick|cock|pussy|cunt|hell|bastard/i.test(text);
|
|
const isPositive = /love|thank|grateful|appreciate|happy|joy|peace|calm|beautiful|wonderful|amazing|great/i.test(text);
|
|
const isNegative = /hate|angry|furious|enraged|upset|sad|depressed|hopeless|worthless|terrible/i.test(text);
|
|
|
|
let tone: ToneLabel;
|
|
let cis: number;
|
|
|
|
if (isPositive && !isNegative) {
|
|
tone = 'positive';
|
|
cis = 0.9;
|
|
} else if (isNegative && !isPositive) {
|
|
tone = 'negative';
|
|
cis = 0.5;
|
|
flags.push('negative_tone');
|
|
} else if (hasProfanity) {
|
|
tone = 'neutral';
|
|
cis = 0.7;
|
|
flags.push('profanity');
|
|
} else {
|
|
tone = 'neutral';
|
|
cis = 0.8;
|
|
}
|
|
|
|
return { tone, cis, flags, shouldReject: false };
|
|
}
|
|
|
|
/**
|
|
* Generate user-facing feedback for rejected content
|
|
*/
|
|
export function getRewriteSuggestion(analysis: ToneAnalysis): string {
|
|
if (analysis.tone === 'hate') {
|
|
return 'Slurs are not allowed on sojorn.';
|
|
}
|
|
if (analysis.tone === 'hostile') {
|
|
return 'Sharp speech does not travel here. Consider softening your words.';
|
|
}
|
|
if (analysis.tone === 'negative') {
|
|
return 'This reads as negative. If you want it to reach others, try reframing.';
|
|
}
|
|
return 'Consider adjusting your tone for better engagement.';
|
|
}
|