diff --git a/go-backend/cmd/api/main.go b/go-backend/cmd/api/main.go index c885956..25271b5 100644 --- a/go-backend/cmd/api/main.go +++ b/go-backend/cmd/api/main.go @@ -127,11 +127,14 @@ func main() { // Initialize content filter (hard blocklist + strike system) contentFilter := services.NewContentFilter(dbPool) + // Initialize link preview service + linkPreviewService := services.NewLinkPreviewService(dbPool) + hub := realtime.NewHub() wsHandler := handlers.NewWSHandler(hub, cfg.JWTSecret) userHandler := handlers.NewUserHandler(userRepo, postRepo, notificationService, assetService) - postHandler := handlers.NewPostHandler(postRepo, userRepo, feedService, assetService, notificationService, moderationService, contentFilter, openRouterService) + postHandler := handlers.NewPostHandler(postRepo, userRepo, feedService, assetService, notificationService, moderationService, contentFilter, openRouterService, linkPreviewService) chatHandler := handlers.NewChatHandler(chatRepo, notificationService, hub) authHandler := handlers.NewAuthHandler(userRepo, cfg, emailService, sendPulseService) categoryHandler := handlers.NewCategoryHandler(categoryRepo) diff --git a/go-backend/internal/database/migrations/20260208000002_link_previews.up.sql b/go-backend/internal/database/migrations/20260208000002_link_previews.up.sql new file mode 100644 index 0000000..932cfd9 --- /dev/null +++ b/go-backend/internal/database/migrations/20260208000002_link_previews.up.sql @@ -0,0 +1,10 @@ +-- Add link preview columns to posts table +ALTER TABLE public.posts + ADD COLUMN IF NOT EXISTS link_preview_url TEXT, + ADD COLUMN IF NOT EXISTS link_preview_title TEXT, + ADD COLUMN IF NOT EXISTS link_preview_description TEXT, + ADD COLUMN IF NOT EXISTS link_preview_image_url TEXT, + ADD COLUMN IF NOT EXISTS link_preview_site_name TEXT; + +-- Index for quick lookups when enriching posts +CREATE INDEX IF NOT EXISTS idx_posts_link_preview ON public.posts (id) WHERE link_preview_url IS NOT NULL; diff --git a/go-backend/internal/handlers/post_handler.go b/go-backend/internal/handlers/post_handler.go index 41ef4b0..1f1ad19 100644 --- a/go-backend/internal/handlers/post_handler.go +++ b/go-backend/internal/handlers/post_handler.go @@ -25,9 +25,10 @@ type PostHandler struct { moderationService *services.ModerationService contentFilter *services.ContentFilter openRouterService *services.OpenRouterService + linkPreviewService *services.LinkPreviewService } -func NewPostHandler(postRepo *repository.PostRepository, userRepo *repository.UserRepository, feedService *services.FeedService, assetService *services.AssetService, notificationService *services.NotificationService, moderationService *services.ModerationService, contentFilter *services.ContentFilter, openRouterService *services.OpenRouterService) *PostHandler { +func NewPostHandler(postRepo *repository.PostRepository, userRepo *repository.UserRepository, feedService *services.FeedService, assetService *services.AssetService, notificationService *services.NotificationService, moderationService *services.ModerationService, contentFilter *services.ContentFilter, openRouterService *services.OpenRouterService, linkPreviewService *services.LinkPreviewService) *PostHandler { return &PostHandler{ postRepo: postRepo, userRepo: userRepo, @@ -37,6 +38,49 @@ func NewPostHandler(postRepo *repository.PostRepository, userRepo *repository.Us moderationService: moderationService, contentFilter: contentFilter, openRouterService: openRouterService, + linkPreviewService: linkPreviewService, + } +} + +// enrichLinkPreviews populates link_preview fields on a slice of posts via batch query. +func (h *PostHandler) enrichLinkPreviews(ctx context.Context, posts []models.Post) { + if h.linkPreviewService == nil || len(posts) == 0 { + return + } + ids := make([]string, len(posts)) + for i, p := range posts { + ids[i] = p.ID.String() + } + previews, err := h.linkPreviewService.EnrichPostsWithLinkPreviews(ctx, ids) + if err != nil || len(previews) == 0 { + return + } + for i := range posts { + if lp, ok := previews[posts[i].ID.String()]; ok { + posts[i].LinkPreviewURL = &lp.URL + posts[i].LinkPreviewTitle = &lp.Title + posts[i].LinkPreviewDescription = &lp.Description + posts[i].LinkPreviewImageURL = &lp.ImageURL + posts[i].LinkPreviewSiteName = &lp.SiteName + } + } +} + +// enrichSinglePostLinkPreview populates link_preview fields on a single post. +func (h *PostHandler) enrichSinglePostLinkPreview(ctx context.Context, post *models.Post) { + if h.linkPreviewService == nil || post == nil { + return + } + previews, err := h.linkPreviewService.EnrichPostsWithLinkPreviews(ctx, []string{post.ID.String()}) + if err != nil || len(previews) == 0 { + return + } + if lp, ok := previews[post.ID.String()]; ok { + post.LinkPreviewURL = &lp.URL + post.LinkPreviewTitle = &lp.Title + post.LinkPreviewDescription = &lp.Description + post.LinkPreviewImageURL = &lp.ImageURL + post.LinkPreviewSiteName = &lp.SiteName } } @@ -531,6 +575,24 @@ func (h *PostHandler) CreatePost(c *gin.Context) { h.moderationService.LogAIDecision(c.Request.Context(), "post", post.ID, userID, req.Body, scores, nil, decision, flagReason, orDecision, nil) } + // Auto-extract link preview from post body (async — don't block response) + if h.linkPreviewService != nil { + go func() { + ctx := context.Background() + linkURL := services.ExtractFirstURL(req.Body) + if linkURL != "" { + // Check if author is an official account (trusted = skip safety checks) + var isOfficial bool + _ = h.postRepo.Pool().QueryRow(ctx, `SELECT COALESCE(is_official, false) FROM profiles WHERE id = $1`, userID).Scan(&isOfficial) + + lp, err := h.linkPreviewService.FetchPreview(ctx, linkURL, isOfficial) + if err == nil && lp != nil { + _ = h.linkPreviewService.SaveLinkPreview(ctx, post.ID.String(), lp) + } + } + }() + } + // Check for @mentions and notify mentioned users go func() { if h.notificationService != nil && strings.Contains(req.Body, "@") { @@ -569,6 +631,7 @@ func (h *PostHandler) GetFeed(c *gin.Context) { return } + h.enrichLinkPreviews(c.Request.Context(), posts) c.JSON(http.StatusOK, gin.H{"posts": posts}) } @@ -602,6 +665,7 @@ func (h *PostHandler) GetProfilePosts(c *gin.Context) { return } + h.enrichLinkPreviews(c.Request.Context(), posts) c.JSON(http.StatusOK, gin.H{"posts": posts}) } @@ -636,6 +700,7 @@ func (h *PostHandler) GetPost(c *gin.Context) { post.ThumbnailURL = &signed } + h.enrichSinglePostLinkPreview(c.Request.Context(), post) c.JSON(http.StatusOK, gin.H{"post": post}) } @@ -863,6 +928,7 @@ func (h *PostHandler) GetSavedPosts(c *gin.Context) { return } + h.enrichLinkPreviews(c.Request.Context(), posts) c.JSON(http.StatusOK, gin.H{"posts": posts}) } @@ -897,6 +963,7 @@ func (h *PostHandler) GetLikedPosts(c *gin.Context) { return } + h.enrichLinkPreviews(c.Request.Context(), posts) c.JSON(http.StatusOK, gin.H{"posts": posts}) } @@ -933,6 +1000,7 @@ func (h *PostHandler) GetPostChain(c *gin.Context) { } } + h.enrichLinkPreviews(c.Request.Context(), posts) c.JSON(http.StatusOK, gin.H{"posts": posts}) } @@ -958,6 +1026,13 @@ func (h *PostHandler) GetPostFocusContext(c *gin.Context) { h.signPostMedia(&focusContext.Children[i]) } + // Enrich link previews for all posts in focus context + h.enrichSinglePostLinkPreview(c.Request.Context(), focusContext.TargetPost) + h.enrichSinglePostLinkPreview(c.Request.Context(), focusContext.ParentPost) + for i := range focusContext.Children { + h.enrichSinglePostLinkPreview(c.Request.Context(), &focusContext.Children[i]) + } + c.JSON(http.StatusOK, focusContext) } diff --git a/go-backend/internal/models/post.go b/go-backend/internal/models/post.go index f552d8b..7184c5d 100644 --- a/go-backend/internal/models/post.go +++ b/go-backend/internal/models/post.go @@ -34,9 +34,16 @@ type Post struct { IsNSFW bool `json:"is_nsfw" db:"is_nsfw"` NSFWReason string `json:"nsfw_reason" db:"nsfw_reason"` ExpiresAt *time.Time `json:"expires_at" db:"expires_at"` - CreatedAt time.Time `json:"created_at" db:"created_at"` - EditedAt *time.Time `json:"edited_at,omitempty" db:"edited_at"` - DeletedAt *time.Time `json:"deleted_at,omitempty" db:"deleted_at"` + + // Link preview (populated via enrichment, not in every query) + LinkPreviewURL *string `json:"link_preview_url,omitempty" db:"link_preview_url"` + LinkPreviewTitle *string `json:"link_preview_title,omitempty" db:"link_preview_title"` + LinkPreviewDescription *string `json:"link_preview_description,omitempty" db:"link_preview_description"` + LinkPreviewImageURL *string `json:"link_preview_image_url,omitempty" db:"link_preview_image_url"` + LinkPreviewSiteName *string `json:"link_preview_site_name,omitempty" db:"link_preview_site_name"` + CreatedAt time.Time `json:"created_at" db:"created_at"` + EditedAt *time.Time `json:"edited_at,omitempty" db:"edited_at"` + DeletedAt *time.Time `json:"deleted_at,omitempty" db:"deleted_at"` // Joined fields (Scan targets) AuthorHandle string `json:"-" db:"author_handle"` diff --git a/go-backend/internal/services/link_preview_service.go b/go-backend/internal/services/link_preview_service.go new file mode 100644 index 0000000..693d669 --- /dev/null +++ b/go-backend/internal/services/link_preview_service.go @@ -0,0 +1,322 @@ +package services + +import ( + "context" + "fmt" + "io" + "net" + "net/http" + "net/url" + "regexp" + "strings" + "time" + + "github.com/jackc/pgx/v5/pgxpool" + "github.com/rs/zerolog/log" +) + +// LinkPreview represents the OG metadata extracted from a URL. +type LinkPreview struct { + URL string `json:"link_preview_url"` + Title string `json:"link_preview_title"` + Description string `json:"link_preview_description"` + ImageURL string `json:"link_preview_image_url"` + SiteName string `json:"link_preview_site_name"` +} + +// LinkPreviewService fetches and parses OpenGraph metadata from URLs. +type LinkPreviewService struct { + pool *pgxpool.Pool + httpClient *http.Client +} + +func NewLinkPreviewService(pool *pgxpool.Pool) *LinkPreviewService { + return &LinkPreviewService{ + pool: pool, + httpClient: &http.Client{ + Timeout: 8 * time.Second, + CheckRedirect: func(req *http.Request, via []*http.Request) error { + if len(via) >= 5 { + return fmt.Errorf("too many redirects") + } + return nil + }, + }, + } +} + +// blockedIPRanges are private/internal IP ranges that untrusted URLs must not resolve to. +var blockedIPRanges = []string{ + "127.0.0.0/8", + "10.0.0.0/8", + "172.16.0.0/12", + "192.168.0.0/16", + "169.254.0.0/16", + "::1/128", + "fc00::/7", + "fe80::/10", +} + +var blockedNets []*net.IPNet + +func init() { + for _, cidr := range blockedIPRanges { + _, ipNet, err := net.ParseCIDR(cidr) + if err == nil { + blockedNets = append(blockedNets, ipNet) + } + } +} + +func isPrivateIP(ip net.IP) bool { + for _, n := range blockedNets { + if n.Contains(ip) { + return true + } + } + return false +} + +// ExtractFirstURL finds the first http/https URL in a text string. +func ExtractFirstURL(text string) string { + re := regexp.MustCompile(`https?://[^\s<>"')\]]+`) + match := re.FindString(text) + // Clean trailing punctuation that's not part of the URL + match = strings.TrimRight(match, ".,;:!?") + return match +} + +// FetchPreview fetches OG metadata from a URL. +// If trusted is false, performs safety checks (no internal IPs, domain validation). +func (s *LinkPreviewService) FetchPreview(ctx context.Context, rawURL string, trusted bool) (*LinkPreview, error) { + if rawURL == "" { + return nil, fmt.Errorf("empty URL") + } + + parsed, err := url.Parse(rawURL) + if err != nil { + return nil, fmt.Errorf("invalid URL: %w", err) + } + + if parsed.Scheme != "http" && parsed.Scheme != "https" { + return nil, fmt.Errorf("unsupported scheme: %s", parsed.Scheme) + } + + // Safety checks for untrusted URLs + if !trusted { + if err := s.validateURL(parsed); err != nil { + return nil, fmt.Errorf("unsafe URL: %w", err) + } + } + + req, err := http.NewRequestWithContext(ctx, "GET", rawURL, nil) + if err != nil { + return nil, err + } + req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; Sojorn/1.0; +https://sojorn.net)") + req.Header.Set("Accept", "text/html") + + resp, err := s.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("fetch failed: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("HTTP %d", resp.StatusCode) + } + + ct := resp.Header.Get("Content-Type") + if !strings.Contains(ct, "text/html") && !strings.Contains(ct, "application/xhtml") { + return nil, fmt.Errorf("not HTML: %s", ct) + } + + // Read max 1MB + limited := io.LimitReader(resp.Body, 1*1024*1024) + body, err := io.ReadAll(limited) + if err != nil { + return nil, err + } + + preview := s.parseOGTags(string(body), rawURL) + if preview.Title == "" && preview.Description == "" && preview.ImageURL == "" { + return nil, fmt.Errorf("no OG metadata found") + } + + preview.URL = rawURL + if preview.SiteName == "" { + preview.SiteName = parsed.Hostname() + } + + return preview, nil +} + +// validateURL checks that an untrusted URL doesn't point to internal resources. +func (s *LinkPreviewService) validateURL(u *url.URL) error { + host := u.Hostname() + + // Block bare IPs for untrusted requests + if ip := net.ParseIP(host); ip != nil { + if isPrivateIP(ip) { + return fmt.Errorf("private IP not allowed") + } + } + + // Resolve DNS and check all IPs + ips, err := net.LookupIP(host) + if err != nil { + return fmt.Errorf("DNS lookup failed: %w", err) + } + for _, ip := range ips { + if isPrivateIP(ip) { + return fmt.Errorf("resolves to private IP") + } + } + + return nil +} + +// parseOGTags extracts OpenGraph meta tags from raw HTML. +func (s *LinkPreviewService) parseOGTags(html string, sourceURL string) *LinkPreview { + preview := &LinkPreview{} + + // Use regex to extract meta tags — lightweight, no dependency needed + metaRe := regexp.MustCompile(`(?i)]*>`) + metas := metaRe.FindAllString(html, -1) + + for _, tag := range metas { + prop := extractAttr(tag, "property") + if prop == "" { + prop = extractAttr(tag, "name") + } + content := extractAttr(tag, "content") + if content == "" { + continue + } + + switch strings.ToLower(prop) { + case "og:title": + if preview.Title == "" { + preview.Title = content + } + case "og:description": + if preview.Description == "" { + preview.Description = content + } + case "og:image": + if preview.ImageURL == "" { + preview.ImageURL = resolveImageURL(content, sourceURL) + } + case "og:site_name": + if preview.SiteName == "" { + preview.SiteName = content + } + case "description": + // Fallback if no og:description + if preview.Description == "" { + preview.Description = content + } + } + } + + // Fallback: try