From 0f7874b429868a27a5442187c29e128c068321a6 Mon Sep 17 00:00:00 2001 From: Patrick Britton Date: Mon, 9 Feb 2026 08:58:56 -0600 Subject: [PATCH] fix: unescape HTML entities in OG tag parsing for link previews --- fix_entities.sql | 15 +++++++++++++++ .../internal/services/link_preview_service.go | 11 ++++++----- 2 files changed, 21 insertions(+), 5 deletions(-) create mode 100644 fix_entities.sql diff --git a/fix_entities.sql b/fix_entities.sql new file mode 100644 index 0000000..9b0a922 --- /dev/null +++ b/fix_entities.sql @@ -0,0 +1,15 @@ +-- Fix HTML entities in link preview titles +UPDATE posts SET link_preview_title = REPLACE(link_preview_title, ''', '''') WHERE link_preview_title LIKE '%'%'; +UPDATE posts SET link_preview_title = REPLACE(link_preview_title, '&', '&') WHERE link_preview_title LIKE '%&%'; +UPDATE posts SET link_preview_title = REPLACE(link_preview_title, '"', '"') WHERE link_preview_title LIKE '%"%'; + +-- Fix HTML entities in link preview descriptions +UPDATE posts SET link_preview_description = REPLACE(link_preview_description, ''', '''') WHERE link_preview_description LIKE '%'%'; +UPDATE posts SET link_preview_description = REPLACE(link_preview_description, '&', '&') WHERE link_preview_description LIKE '%&%'; +UPDATE posts SET link_preview_description = REPLACE(link_preview_description, '"', '"') WHERE link_preview_description LIKE '%"%'; + +-- Fix HTML entities in article titles/descriptions +UPDATE official_account_articles SET title = REPLACE(title, ''', '''') WHERE title LIKE '%'%'; +UPDATE official_account_articles SET title = REPLACE(title, '&', '&') WHERE title LIKE '%&%'; +UPDATE official_account_articles SET description = REPLACE(description, ''', '''') WHERE description LIKE '%'%'; +UPDATE official_account_articles SET description = REPLACE(description, '&', '&') WHERE description LIKE '%&%'; diff --git a/go-backend/internal/services/link_preview_service.go b/go-backend/internal/services/link_preview_service.go index 5409b0a..0727944 100644 --- a/go-backend/internal/services/link_preview_service.go +++ b/go-backend/internal/services/link_preview_service.go @@ -3,6 +3,7 @@ package services import ( "context" "fmt" + "html" "io" "net" "net/http" @@ -182,19 +183,19 @@ func (s *LinkPreviewService) validateURL(u *url.URL) error { } // parseOGTags extracts OpenGraph meta tags from raw HTML. -func (s *LinkPreviewService) parseOGTags(html string, sourceURL string) *LinkPreview { +func (s *LinkPreviewService) parseOGTags(htmlStr string, sourceURL string) *LinkPreview { preview := &LinkPreview{} // Use regex to extract meta tags — lightweight, no dependency needed metaRe := regexp.MustCompile(`(?i)]*>`) - metas := metaRe.FindAllString(html, -1) + metas := metaRe.FindAllString(htmlStr, -1) for _, tag := range metas { prop := extractAttr(tag, "property") if prop == "" { prop = extractAttr(tag, "name") } - content := extractAttr(tag, "content") + content := html.UnescapeString(extractAttr(tag, "content")) if content == "" { continue } @@ -227,8 +228,8 @@ func (s *LinkPreviewService) parseOGTags(html string, sourceURL string) *LinkPre // Fallback: try tag if no og:title if preview.Title == "" { titleRe := regexp.MustCompile(`(?i)<title[^>]*>(.*?)`) - if m := titleRe.FindStringSubmatch(html); len(m) > 1 { - preview.Title = strings.TrimSpace(m[1]) + if m := titleRe.FindStringSubmatch(htmlStr); len(m) > 1 { + preview.Title = html.UnescapeString(strings.TrimSpace(m[1])) } }