fix: use GUID for Google News article dedup, fall back to RSS source URL when link resolution fails

This commit is contained in:
Patrick Britton 2026-02-08 19:08:48 -06:00
parent 70261d839b
commit da5a366cc1

View file

@ -262,9 +262,19 @@ func (s *OfficialAccountsService) FetchRSS(ctx context.Context, rssURL string) (
isGoogleNews := strings.Contains(rssURL, "news.google.com/rss")
if isGoogleNews {
for i := range feed.Channel.Items {
resolved := s.resolveGoogleNewsLink(feed.Channel.Items[i].Link)
if resolved != "" {
feed.Channel.Items[i].Link = resolved
item := &feed.Channel.Items[i]
// Preserve original Google News URL in GUID for dedup
if item.GUID == "" {
item.GUID = item.Link
}
resolved := ResolveGoogleNewsURL(item.Link)
if resolved != item.Link && resolved != "" {
// Base64 decode succeeded — use real article URL
item.Link = resolved
} else if item.Source.URL != "" {
// Fall back to the <source url="..."> from the RSS item
item.Link = item.Source.URL
log.Debug().Str("source", item.Source.Name).Str("url", item.Source.URL).Msg("Using RSS source URL as fallback for Google News link")
}
}
}
@ -409,9 +419,10 @@ func (s *OfficialAccountsService) FetchNewArticles(ctx context.Context, configID
var newItems []RSSItem
var newSourceNames []string
for i, item := range allItems {
link := item.Link
// Use GUID (original Google News URL) for dedup — Link may be a source homepage
link := item.GUID
if link == "" {
link = item.GUID
link = item.Link
}
if link == "" {
continue
@ -517,9 +528,10 @@ func (s *OfficialAccountsService) CreatePostForAccount(ctx context.Context, conf
// Track article if this was a news post
if article != nil {
link := article.Link
// Use GUID (original Google News URL) for dedup tracking, not Link (may be source homepage)
link := article.GUID
if link == "" {
link = article.GUID
link = article.Link
}
postIDStr := postID.String()
_, _ = tx.Exec(ctx, `