feat: AI moderation audit log with admin feedback for training - DB migration, service methods, admin endpoints

2026-02-07 16:35:50 -06:00 · 2026-02-07 16:35:50 -06:00 · 1f0461b4f4
parent c83317c29c
commit 1f0461b4f4
4 changed files with 296 additions and 0 deletions
--- a/go-backend/cmd/api/main.go
+++ b/go-backend/cmd/api/main.go
@ -464,6 +464,11 @@ func main() {
 		admin.GET("/ai/config", adminHandler.GetAIModerationConfigs)
 		admin.PUT("/ai/config", adminHandler.SetAIModerationConfig)
 		admin.POST("/ai/test", adminHandler.TestAIModeration)
+
+		// AI Moderation Audit Log
+		admin.GET("/ai/moderation-log", adminHandler.GetAIModerationLog)
+		admin.POST("/ai/moderation-log/:id/feedback", adminHandler.SubmitAIModerationFeedback)
+		admin.GET("/ai/training-data", adminHandler.ExportAITrainingData)
 	}

 	// Public claim request endpoint (no auth)
--- a/go-backend/internal/handlers/admin_handler.go
+++ b/go-backend/internal/handlers/admin_handler.go
@ -2455,3 +2455,81 @@ func (h *AdminHandler) TestAIModeration(c *gin.Context) {

 	c.JSON(http.StatusOK, gin.H{"result": result})
 }
+
+// ──────────────────────────────────────────────
+// AI Moderation Audit Log
+// ──────────────────────────────────────────────
+
+func (h *AdminHandler) GetAIModerationLog(c *gin.Context) {
+	ctx := c.Request.Context()
+	limit, _ := strconv.Atoi(c.DefaultQuery("limit", "50"))
+	offset, _ := strconv.Atoi(c.DefaultQuery("offset", "0"))
+	decision := c.Query("decision")
+	contentType := c.Query("content_type")
+	search := c.Query("search")
+	feedbackFilter := c.Query("feedback")
+
+	items, total, err := h.moderationService.GetAIModerationLog(ctx, limit, offset, decision, contentType, search, feedbackFilter)
+	if err != nil {
+		log.Error().Err(err).Msg("Failed to fetch AI moderation log")
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to fetch AI moderation log"})
+		return
+	}
+
+	c.JSON(http.StatusOK, gin.H{
+		"items":  items,
+		"total":  total,
+		"limit":  limit,
+		"offset": offset,
+	})
+}
+
+func (h *AdminHandler) SubmitAIModerationFeedback(c *gin.Context) {
+	ctx := c.Request.Context()
+	adminID, _ := c.Get("user_id")
+	logID := c.Param("id")
+
+	var req struct {
+		Correct bool   `json:"correct"`
+		Reason  string `json:"reason" binding:"required"`
+	}
+	if err := c.ShouldBindJSON(&req); err != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
+		return
+	}
+
+	logUUID, err := uuid.Parse(logID)
+	if err != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid log ID"})
+		return
+	}
+	adminUUID, _ := uuid.Parse(adminID.(string))
+
+	if err := h.moderationService.SubmitAIFeedback(ctx, logUUID, req.Correct, req.Reason, adminUUID); err != nil {
+		log.Error().Err(err).Msg("Failed to submit AI feedback")
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to submit feedback"})
+		return
+	}
+
+	// Audit log
+	h.pool.Exec(ctx, `INSERT INTO audit_log (actor_id, action, target_type, target_id, details) VALUES ($1, 'ai_moderation_feedback', 'ai_moderation_log', $2, $3)`,
+		adminUUID, logID, fmt.Sprintf(`{"correct":%v,"reason":"%s"}`, req.Correct, req.Reason))
+
+	c.JSON(http.StatusOK, gin.H{"message": "Feedback submitted"})
+}
+
+func (h *AdminHandler) ExportAITrainingData(c *gin.Context) {
+	ctx := c.Request.Context()
+
+	data, err := h.moderationService.GetAITrainingData(ctx)
+	if err != nil {
+		log.Error().Err(err).Msg("Failed to export AI training data")
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to export training data"})
+		return
+	}
+
+	c.JSON(http.StatusOK, gin.H{
+		"training_data": data,
+		"count":         len(data),
+	})
+}
--- a/go-backend/internal/handlers/post_handler.go
+++ b/go-backend/internal/handlers/post_handler.go
@ -131,6 +131,17 @@ func (h *PostHandler) CreateComment(c *gin.Context) {
 		_ = h.moderationService.FlagComment(c.Request.Context(), post.ID, scores, reason)
 	}

+	// Log AI moderation decision for comment
+	if h.moderationService != nil {
+		decision := "pass"
+		if post.Status == "pending_moderation" {
+			decision = "flag"
+		}
+		invCis := 1.0 - cis
+		scores := &services.ThreePoisonsScore{Hate: invCis, Greed: 0, Delusion: 0}
+		h.moderationService.LogAIDecision(c.Request.Context(), "comment", post.ID, userID, req.Body, scores, nil, decision, tone, "", nil)
+	}
+
 	// Get post details for notification
 	rootPost, err := h.postRepo.GetPostByID(c.Request.Context(), postID, userIDStr.(string))
 	if err == nil && rootPost.AuthorID.String() != userIDStr.(string) {
@ -373,6 +384,28 @@ func (h *PostHandler) CreatePost(c *gin.Context) {
 		_ = h.moderationService.FlagPost(c.Request.Context(), post.ID, scores, reason)
 	}

+	// Log AI moderation decision to audit log
+	if h.moderationService != nil {
+		decision := "pass"
+		flagReason := ""
+		if post.ToneLabel != nil && *post.ToneLabel != "" {
+			flagReason = *post.ToneLabel
+		}
+		if post.Status == "pending_moderation" {
+			decision = "flag"
+		} else if post.IsNSFW {
+			decision = "nsfw"
+		}
+		var scores *services.ThreePoisonsScore
+		if post.CISScore != nil {
+			invCis := 1.0 - *post.CISScore
+			scores = &services.ThreePoisonsScore{Hate: invCis, Greed: 0, Delusion: 0}
+		} else {
+			scores = &services.ThreePoisonsScore{}
+		}
+		h.moderationService.LogAIDecision(c.Request.Context(), "post", post.ID, userID, req.Body, scores, nil, decision, flagReason, "", nil)
+	}
+
 	// Check for @mentions and notify mentioned users
 	go func() {
 		if h.notificationService != nil && strings.Contains(req.Body, "@") {
--- a/go-backend/internal/services/moderation_service.go
+++ b/go-backend/internal/services/moderation_service.go
@ -516,6 +516,186 @@ func (s *ModerationService) UpdateUserStatus(ctx context.Context, userID uuid.UU
 	return nil
 }

+// ============================================================================
+// AI Moderation Audit Log
+// ============================================================================
+
+// LogAIDecision records an AI moderation decision to the audit log
+func (s *ModerationService) LogAIDecision(ctx context.Context, contentType string, contentID uuid.UUID, authorID uuid.UUID, contentSnippet string, scores *ThreePoisonsScore, rawScores json.RawMessage, decision string, flagReason string, orDecision string, orScores json.RawMessage) {
+	snippet := contentSnippet
+	if len(snippet) > 200 {
+		snippet = snippet[:200]
+	}
+
+	_, err := s.pool.Exec(ctx, `
+		INSERT INTO ai_moderation_log (content_type, content_id, author_id, content_snippet, decision, flag_reason, scores_hate, scores_greed, scores_delusion, raw_scores, or_decision, or_scores)
+		VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)
+	`, contentType, contentID, authorID, snippet, decision, flagReason, scores.Hate, scores.Greed, scores.Delusion, rawScores, orDecision, orScores)
+	if err != nil {
+		fmt.Printf("Failed to log AI moderation decision: %v\n", err)
+	}
+}
+
+// GetAIModerationLog retrieves the AI moderation audit log with filters
+func (s *ModerationService) GetAIModerationLog(ctx context.Context, limit, offset int, decision, contentType, search string, feedbackFilter string) ([]map[string]interface{}, int, error) {
+	where := "WHERE 1=1"
+	args := []interface{}{}
+	argIdx := 1
+
+	if decision != "" {
+		where += fmt.Sprintf(" AND aml.decision = $%d", argIdx)
+		args = append(args, decision)
+		argIdx++
+	}
+	if contentType != "" {
+		where += fmt.Sprintf(" AND aml.content_type = $%d", argIdx)
+		args = append(args, contentType)
+		argIdx++
+	}
+	if search != "" {
+		where += fmt.Sprintf(" AND (aml.content_snippet ILIKE '%%' || $%d || '%%' OR pr.handle ILIKE '%%' || $%d || '%%')", argIdx, argIdx)
+		args = append(args, search)
+		argIdx++
+	}
+	if feedbackFilter == "reviewed" {
+		where += " AND aml.feedback_correct IS NOT NULL"
+	} else if feedbackFilter == "unreviewed" {
+		where += " AND aml.feedback_correct IS NULL"
+	}
+
+	// Count total
+	var total int
+	countArgs := make([]interface{}, len(args))
+	copy(countArgs, args)
+	s.pool.QueryRow(ctx, fmt.Sprintf(`SELECT COUNT(*) FROM ai_moderation_log aml LEFT JOIN profiles pr ON aml.author_id = pr.id %s`, where), countArgs...).Scan(&total)
+
+	// Fetch rows
+	query := fmt.Sprintf(`
+		SELECT aml.id, aml.content_type, aml.content_id, aml.author_id, aml.content_snippet,
+		       aml.ai_provider, aml.decision, aml.flag_reason,
+		       aml.scores_hate, aml.scores_greed, aml.scores_delusion, aml.raw_scores,
+		       aml.or_decision, aml.or_scores,
+		       aml.feedback_correct, aml.feedback_reason, aml.feedback_by, aml.feedback_at,
+		       aml.created_at,
+		       COALESCE(pr.handle, '') as author_handle,
+		       COALESCE(pr.display_name, '') as author_display_name
+		FROM ai_moderation_log aml
+		LEFT JOIN profiles pr ON aml.author_id = pr.id
+		%s
+		ORDER BY aml.created_at DESC
+		LIMIT $%d OFFSET $%d
+	`, where, argIdx, argIdx+1)
+	args = append(args, limit, offset)
+
+	rows, err := s.pool.Query(ctx, query, args...)
+	if err != nil {
+		return nil, 0, fmt.Errorf("failed to query ai moderation log: %w", err)
+	}
+	defer rows.Close()
+
+	var items []map[string]interface{}
+	for rows.Next() {
+		var id, contentID, authorID uuid.UUID
+		var cType, snippet, aiProvider, dec string
+		var flagReason, orDecision, feedbackReason *string
+		var feedbackBy *uuid.UUID
+		var feedbackCorrect *bool
+		var feedbackAt *time.Time
+		var scoresHate, scoresGreed, scoresDelusion float64
+		var rawScores, orScores []byte
+		var createdAt time.Time
+		var authorHandle, authorDisplayName string
+
+		if err := rows.Scan(&id, &cType, &contentID, &authorID, &snippet,
+			&aiProvider, &dec, &flagReason,
+			&scoresHate, &scoresGreed, &scoresDelusion, &rawScores,
+			&orDecision, &orScores,
+			&feedbackCorrect, &feedbackReason, &feedbackBy, &feedbackAt,
+			&createdAt,
+			&authorHandle, &authorDisplayName,
+		); err != nil {
+			fmt.Printf("Failed to scan ai moderation log row: %v\n", err)
+			continue
+		}
+
+		item := map[string]interface{}{
+			"id":                  id,
+			"content_type":        cType,
+			"content_id":          contentID,
+			"author_id":           authorID,
+			"content_snippet":     snippet,
+			"ai_provider":         aiProvider,
+			"decision":            dec,
+			"flag_reason":         flagReason,
+			"scores_hate":         scoresHate,
+			"scores_greed":        scoresGreed,
+			"scores_delusion":     scoresDelusion,
+			"raw_scores":          json.RawMessage(rawScores),
+			"or_decision":         orDecision,
+			"or_scores":           json.RawMessage(orScores),
+			"feedback_correct":    feedbackCorrect,
+			"feedback_reason":     feedbackReason,
+			"feedback_by":         feedbackBy,
+			"feedback_at":         feedbackAt,
+			"created_at":          createdAt,
+			"author_handle":       authorHandle,
+			"author_display_name": authorDisplayName,
+		}
+		items = append(items, item)
+	}
+
+	return items, total, nil
+}
+
+// SubmitAIFeedback records admin training feedback on an AI moderation decision
+func (s *ModerationService) SubmitAIFeedback(ctx context.Context, logID uuid.UUID, correct bool, reason string, adminID uuid.UUID) error {
+	_, err := s.pool.Exec(ctx, `
+		UPDATE ai_moderation_log 
+		SET feedback_correct = $1, feedback_reason = $2, feedback_by = $3, feedback_at = NOW()
+		WHERE id = $4
+	`, correct, reason, adminID, logID)
+	if err != nil {
+		return fmt.Errorf("failed to submit AI feedback: %w", err)
+	}
+	return nil
+}
+
+// GetAITrainingData exports all reviewed feedback entries for fine-tuning
+func (s *ModerationService) GetAITrainingData(ctx context.Context) ([]map[string]interface{}, error) {
+	rows, err := s.pool.Query(ctx, `
+		SELECT content_snippet, decision, flag_reason, scores_hate, scores_greed, scores_delusion,
+		       feedback_correct, feedback_reason
+		FROM ai_moderation_log
+		WHERE feedback_correct IS NOT NULL
+		ORDER BY feedback_at DESC
+	`)
+	if err != nil {
+		return nil, err
+	}
+	defer rows.Close()
+
+	var items []map[string]interface{}
+	for rows.Next() {
+		var snippet, decision string
+		var flagReason, feedbackReason *string
+		var hate, greed, delusion float64
+		var correct bool
+
+		if err := rows.Scan(&snippet, &decision, &flagReason, &hate, &greed, &delusion, &correct, &feedbackReason); err != nil {
+			continue
+		}
+		items = append(items, map[string]interface{}{
+			"content":         snippet,
+			"ai_decision":     decision,
+			"ai_flag_reason":  flagReason,
+			"scores":          map[string]float64{"hate": hate, "greed": greed, "delusion": delusion},
+			"correct":         correct,
+			"feedback_reason": feedbackReason,
+		})
+	}
+	return items, nil
+}
+
 func containsAny(body string, terms []string) bool {
 	// Case insensitive check
 	lower := bytes.ToLower([]byte(body))