fix: use GUID for Google News article dedup, fall back to RSS source URL when link resolution fails

This commit is contained in:
Patrick Britton 2026-02-08 19:08:48 -06:00
parent 70261d839b
commit da5a366cc1

View file

@ -262,9 +262,19 @@ func (s *OfficialAccountsService) FetchRSS(ctx context.Context, rssURL string) (
isGoogleNews := strings.Contains(rssURL, "news.google.com/rss") isGoogleNews := strings.Contains(rssURL, "news.google.com/rss")
if isGoogleNews { if isGoogleNews {
for i := range feed.Channel.Items { for i := range feed.Channel.Items {
resolved := s.resolveGoogleNewsLink(feed.Channel.Items[i].Link) item := &feed.Channel.Items[i]
if resolved != "" { // Preserve original Google News URL in GUID for dedup
feed.Channel.Items[i].Link = resolved if item.GUID == "" {
item.GUID = item.Link
}
resolved := ResolveGoogleNewsURL(item.Link)
if resolved != item.Link && resolved != "" {
// Base64 decode succeeded — use real article URL
item.Link = resolved
} else if item.Source.URL != "" {
// Fall back to the <source url="..."> from the RSS item
item.Link = item.Source.URL
log.Debug().Str("source", item.Source.Name).Str("url", item.Source.URL).Msg("Using RSS source URL as fallback for Google News link")
} }
} }
} }
@ -409,9 +419,10 @@ func (s *OfficialAccountsService) FetchNewArticles(ctx context.Context, configID
var newItems []RSSItem var newItems []RSSItem
var newSourceNames []string var newSourceNames []string
for i, item := range allItems { for i, item := range allItems {
link := item.Link // Use GUID (original Google News URL) for dedup — Link may be a source homepage
link := item.GUID
if link == "" { if link == "" {
link = item.GUID link = item.Link
} }
if link == "" { if link == "" {
continue continue
@ -517,9 +528,10 @@ func (s *OfficialAccountsService) CreatePostForAccount(ctx context.Context, conf
// Track article if this was a news post // Track article if this was a news post
if article != nil { if article != nil {
link := article.Link // Use GUID (original Google News URL) for dedup tracking, not Link (may be source homepage)
link := article.GUID
if link == "" { if link == "" {
link = article.GUID link = article.Link
} }
postIDStr := postID.String() postIDStr := postID.String()
_, _ = tx.Exec(ctx, ` _, _ = tx.Exec(ctx, `