feat: AI moderation audit log with admin feedback for training - DB migration, service methods, admin endpoints

2026-02-07 16:35:50 -06:00 · 2026-02-07 16:35:50 -06:00 · 1f0461b4f4
parent c83317c29c
commit 1f0461b4f4
4 changed files with 296 additions and 0 deletions
--- a/go-backend/cmd/api/main.go
+++ b/go-backend/cmd/api/main.go
@ -464,6 +464,11 @@ func main() {
 		admin.GET("/ai/config", adminHandler.GetAIModerationConfigs)
 		admin.PUT("/ai/config", adminHandler.SetAIModerationConfig)
 		admin.POST("/ai/test", adminHandler.TestAIModeration)
 		// AI Moderation Audit Log
 		admin.GET("/ai/moderation-log", adminHandler.GetAIModerationLog)
 		admin.POST("/ai/moderation-log/:id/feedback", adminHandler.SubmitAIModerationFeedback)
 		admin.GET("/ai/training-data", adminHandler.ExportAITrainingData)
 	}
 	// Public claim request endpoint (no auth)
--- a/go-backend/internal/handlers/admin_handler.go
+++ b/go-backend/internal/handlers/admin_handler.go
@ -2455,3 +2455,81 @@ func (h *AdminHandler) TestAIModeration(c *gin.Context) {
 	c.JSON(http.StatusOK, gin.H{"result": result})
 }
 // ──────────────────────────────────────────────
 // AI Moderation Audit Log
 // ──────────────────────────────────────────────
 func (h *AdminHandler) GetAIModerationLog(c *gin.Context) {
 	ctx := c.Request.Context()
 	limit, _ := strconv.Atoi(c.DefaultQuery("limit", "50"))
 	offset, _ := strconv.Atoi(c.DefaultQuery("offset", "0"))
 	decision := c.Query("decision")
 	contentType := c.Query("content_type")
 	search := c.Query("search")
 	feedbackFilter := c.Query("feedback")
 	items, total, err := h.moderationService.GetAIModerationLog(ctx, limit, offset, decision, contentType, search, feedbackFilter)
 	if err != nil {
 		log.Error().Err(err).Msg("Failed to fetch AI moderation log")
 		c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to fetch AI moderation log"})
 		return
 	}
 	c.JSON(http.StatusOK, gin.H{
 		"items":  items,
 		"total":  total,
 		"limit":  limit,
 		"offset": offset,
 	})
 }
 func (h *AdminHandler) SubmitAIModerationFeedback(c *gin.Context) {
 	ctx := c.Request.Context()
 	adminID, _ := c.Get("user_id")
 	logID := c.Param("id")
 	var req struct {
 		Correct bool   `json:"correct"`
 		Reason  string `json:"reason" binding:"required"`
 	}
 	if err := c.ShouldBindJSON(&req); err != nil {
 		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
 		return
 	}
 	logUUID, err := uuid.Parse(logID)
 	if err != nil {
 		c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid log ID"})
 		return
 	}
 	adminUUID, _ := uuid.Parse(adminID.(string))
 	if err := h.moderationService.SubmitAIFeedback(ctx, logUUID, req.Correct, req.Reason, adminUUID); err != nil {
 		log.Error().Err(err).Msg("Failed to submit AI feedback")
 		c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to submit feedback"})
 		return
 	}
 	// Audit log
 	h.pool.Exec(ctx, `INSERT INTO audit_log (actor_id, action, target_type, target_id, details) VALUES ($1, 'ai_moderation_feedback', 'ai_moderation_log', $2, $3)`,
 		adminUUID, logID, fmt.Sprintf(`{"correct":%v,"reason":"%s"}`, req.Correct, req.Reason))
 	c.JSON(http.StatusOK, gin.H{"message": "Feedback submitted"})
 }
 func (h *AdminHandler) ExportAITrainingData(c *gin.Context) {
 	ctx := c.Request.Context()
 	data, err := h.moderationService.GetAITrainingData(ctx)
 	if err != nil {
 		log.Error().Err(err).Msg("Failed to export AI training data")
 		c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to export training data"})
 		return
 	}
 	c.JSON(http.StatusOK, gin.H{
 		"training_data": data,
 		"count":         len(data),
 	})
 }
--- a/go-backend/internal/handlers/post_handler.go
+++ b/go-backend/internal/handlers/post_handler.go
@ -131,6 +131,17 @@ func (h *PostHandler) CreateComment(c *gin.Context) {
 		_ = h.moderationService.FlagComment(c.Request.Context(), post.ID, scores, reason)
 	}
 	// Log AI moderation decision for comment
 	if h.moderationService != nil {
 		decision := "pass"
 		if post.Status == "pending_moderation" {
 			decision = "flag"
 		}
 		invCis := 1.0 - cis
 		scores := &services.ThreePoisonsScore{Hate: invCis, Greed: 0, Delusion: 0}
 		h.moderationService.LogAIDecision(c.Request.Context(), "comment", post.ID, userID, req.Body, scores, nil, decision, tone, "", nil)
 	}
 	// Get post details for notification
 	rootPost, err := h.postRepo.GetPostByID(c.Request.Context(), postID, userIDStr.(string))
 	if err == nil && rootPost.AuthorID.String() != userIDStr.(string) {
@ -373,6 +384,28 @@ func (h *PostHandler) CreatePost(c *gin.Context) {
 		_ = h.moderationService.FlagPost(c.Request.Context(), post.ID, scores, reason)
 	}
 	// Log AI moderation decision to audit log
 	if h.moderationService != nil {
 		decision := "pass"
 		flagReason := ""
 		if post.ToneLabel != nil && *post.ToneLabel != "" {
 			flagReason = *post.ToneLabel
 		}
 		if post.Status == "pending_moderation" {
 			decision = "flag"
 		} else if post.IsNSFW {
 			decision = "nsfw"
 		}
 		var scores *services.ThreePoisonsScore
 		if post.CISScore != nil {
 			invCis := 1.0 - *post.CISScore
 			scores = &services.ThreePoisonsScore{Hate: invCis, Greed: 0, Delusion: 0}
 		} else {
 			scores = &services.ThreePoisonsScore{}
 		}
 		h.moderationService.LogAIDecision(c.Request.Context(), "post", post.ID, userID, req.Body, scores, nil, decision, flagReason, "", nil)
 	}
 	// Check for @mentions and notify mentioned users
 	go func() {
 		if h.notificationService != nil && strings.Contains(req.Body, "@") {
--- a/go-backend/internal/services/moderation_service.go
+++ b/go-backend/internal/services/moderation_service.go
@ -516,6 +516,186 @@ func (s *ModerationService) UpdateUserStatus(ctx context.Context, userID uuid.UU
 	return nil
 }
 // ============================================================================
 // AI Moderation Audit Log
 // ============================================================================
 // LogAIDecision records an AI moderation decision to the audit log
 func (s *ModerationService) LogAIDecision(ctx context.Context, contentType string, contentID uuid.UUID, authorID uuid.UUID, contentSnippet string, scores *ThreePoisonsScore, rawScores json.RawMessage, decision string, flagReason string, orDecision string, orScores json.RawMessage) {
 	snippet := contentSnippet
 	if len(snippet) > 200 {
 		snippet = snippet[:200]
 	}
 	_, err := s.pool.Exec(ctx, `
 		INSERT INTO ai_moderation_log (content_type, content_id, author_id, content_snippet, decision, flag_reason, scores_hate, scores_greed, scores_delusion, raw_scores, or_decision, or_scores)
 		VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)
 	`, contentType, contentID, authorID, snippet, decision, flagReason, scores.Hate, scores.Greed, scores.Delusion, rawScores, orDecision, orScores)
 	if err != nil {
 		fmt.Printf("Failed to log AI moderation decision: %v\n", err)
 	}
 }
 // GetAIModerationLog retrieves the AI moderation audit log with filters
 func (s *ModerationService) GetAIModerationLog(ctx context.Context, limit, offset int, decision, contentType, search string, feedbackFilter string) ([]map[string]interface{}, int, error) {
 	where := "WHERE 1=1"
 	args := []interface{}{}
 	argIdx := 1
 	if decision != "" {
 		where += fmt.Sprintf(" AND aml.decision = $%d", argIdx)
 		args = append(args, decision)
 		argIdx++
 	}
 	if contentType != "" {
 		where += fmt.Sprintf(" AND aml.content_type = $%d", argIdx)
 		args = append(args, contentType)
 		argIdx++
 	}
 	if search != "" {
 		where += fmt.Sprintf(" AND (aml.content_snippet ILIKE '%%' || $%d || '%%' OR pr.handle ILIKE '%%' || $%d || '%%')", argIdx, argIdx)
 		args = append(args, search)
 		argIdx++
 	}
 	if feedbackFilter == "reviewed" {
 		where += " AND aml.feedback_correct IS NOT NULL"
 	} else if feedbackFilter == "unreviewed" {
 		where += " AND aml.feedback_correct IS NULL"
 	}
 	// Count total
 	var total int
 	countArgs := make([]interface{}, len(args))
 	copy(countArgs, args)
 	s.pool.QueryRow(ctx, fmt.Sprintf(`SELECT COUNT(*) FROM ai_moderation_log aml LEFT JOIN profiles pr ON aml.author_id = pr.id %s`, where), countArgs...).Scan(&total)
 	// Fetch rows
 	query := fmt.Sprintf(`
 		SELECT aml.id, aml.content_type, aml.content_id, aml.author_id, aml.content_snippet,
 		       aml.ai_provider, aml.decision, aml.flag_reason,
 		       aml.scores_hate, aml.scores_greed, aml.scores_delusion, aml.raw_scores,
 		       aml.or_decision, aml.or_scores,
 		       aml.feedback_correct, aml.feedback_reason, aml.feedback_by, aml.feedback_at,
 		       aml.created_at,
 		       COALESCE(pr.handle, '') as author_handle,
 		       COALESCE(pr.display_name, '') as author_display_name
 		FROM ai_moderation_log aml
 		LEFT JOIN profiles pr ON aml.author_id = pr.id
 		%s
 		ORDER BY aml.created_at DESC
 		LIMIT $%d OFFSET $%d
 	`, where, argIdx, argIdx+1)
 	args = append(args, limit, offset)
 	rows, err := s.pool.Query(ctx, query, args...)
 	if err != nil {
 		return nil, 0, fmt.Errorf("failed to query ai moderation log: %w", err)
 	}
 	defer rows.Close()
 	var items []map[string]interface{}
 	for rows.Next() {
 		var id, contentID, authorID uuid.UUID
 		var cType, snippet, aiProvider, dec string
 		var flagReason, orDecision, feedbackReason *string
 		var feedbackBy *uuid.UUID
 		var feedbackCorrect *bool
 		var feedbackAt *time.Time
 		var scoresHate, scoresGreed, scoresDelusion float64
 		var rawScores, orScores []byte
 		var createdAt time.Time
 		var authorHandle, authorDisplayName string
 		if err := rows.Scan(&id, &cType, &contentID, &authorID, &snippet,
 			&aiProvider, &dec, &flagReason,
 			&scoresHate, &scoresGreed, &scoresDelusion, &rawScores,
 			&orDecision, &orScores,
 			&feedbackCorrect, &feedbackReason, &feedbackBy, &feedbackAt,
 			&createdAt,
 			&authorHandle, &authorDisplayName,
 		); err != nil {
 			fmt.Printf("Failed to scan ai moderation log row: %v\n", err)
 			continue
 		}
 		item := map[string]interface{}{
 			"id":                  id,
 			"content_type":        cType,
 			"content_id":          contentID,
 			"author_id":           authorID,
 			"content_snippet":     snippet,
 			"ai_provider":         aiProvider,
 			"decision":            dec,
 			"flag_reason":         flagReason,
 			"scores_hate":         scoresHate,
 			"scores_greed":        scoresGreed,
 			"scores_delusion":     scoresDelusion,
 			"raw_scores":          json.RawMessage(rawScores),
 			"or_decision":         orDecision,
 			"or_scores":           json.RawMessage(orScores),
 			"feedback_correct":    feedbackCorrect,
 			"feedback_reason":     feedbackReason,
 			"feedback_by":         feedbackBy,
 			"feedback_at":         feedbackAt,
 			"created_at":          createdAt,
 			"author_handle":       authorHandle,
 			"author_display_name": authorDisplayName,
 		}
 		items = append(items, item)
 	}
 	return items, total, nil
 }
 // SubmitAIFeedback records admin training feedback on an AI moderation decision
 func (s *ModerationService) SubmitAIFeedback(ctx context.Context, logID uuid.UUID, correct bool, reason string, adminID uuid.UUID) error {
 	_, err := s.pool.Exec(ctx, `
 		UPDATE ai_moderation_log 
 		SET feedback_correct = $1, feedback_reason = $2, feedback_by = $3, feedback_at = NOW()
 		WHERE id = $4
 	`, correct, reason, adminID, logID)
 	if err != nil {
 		return fmt.Errorf("failed to submit AI feedback: %w", err)
 	}
 	return nil
 }
 // GetAITrainingData exports all reviewed feedback entries for fine-tuning
 func (s *ModerationService) GetAITrainingData(ctx context.Context) ([]map[string]interface{}, error) {
 	rows, err := s.pool.Query(ctx, `
 		SELECT content_snippet, decision, flag_reason, scores_hate, scores_greed, scores_delusion,
 		       feedback_correct, feedback_reason
 		FROM ai_moderation_log
 		WHERE feedback_correct IS NOT NULL
 		ORDER BY feedback_at DESC
 	`)
 	if err != nil {
 		return nil, err
 	}
 	defer rows.Close()
 	var items []map[string]interface{}
 	for rows.Next() {
 		var snippet, decision string
 		var flagReason, feedbackReason *string
 		var hate, greed, delusion float64
 		var correct bool
 		if err := rows.Scan(&snippet, &decision, &flagReason, &hate, &greed, &delusion, &correct, &feedbackReason); err != nil {
 			continue
 		}
 		items = append(items, map[string]interface{}{
 			"content":         snippet,
 			"ai_decision":     decision,
 			"ai_flag_reason":  flagReason,
 			"scores":          map[string]float64{"hate": hate, "greed": greed, "delusion": delusion},
 			"correct":         correct,
 			"feedback_reason": feedbackReason,
 		})
 	}
 	return items, nil
 }
 func containsAny(body string, terms []string) bool {
 	// Case insensitive check
 	lower := bytes.ToLower([]byte(body))