From d62332025640a8c8c17490d1197f21107062fe3b Mon Sep 17 00:00:00 2001 From: Patrick Britton Date: Sun, 8 Feb 2026 13:02:57 -0600 Subject: [PATCH] feat: switch news RSS to Google News site: format with redirect resolution --- admin/src/app/official-accounts/page.tsx | 24 ++--- .../services/official_accounts_service.go | 87 ++++++++++++++++--- 2 files changed, 90 insertions(+), 21 deletions(-) diff --git a/admin/src/app/official-accounts/page.tsx b/admin/src/app/official-accounts/page.tsx index aa6f040..0a3c594 100644 --- a/admin/src/app/official-accounts/page.tsx +++ b/admin/src/app/official-accounts/page.tsx @@ -62,9 +62,9 @@ function ModelSelector({ value, onChange, className }: { value: string; onChange } const DEFAULT_NEWS_SOURCES = [ - { name: 'NPR', rss_url: 'https://feeds.npr.org/1001/rss.xml', enabled: true }, - { name: 'AP News', rss_url: 'https://rsshub.app/apnews/topics/apf-topnews', enabled: true }, - { name: 'Bring Me The News', rss_url: 'https://bringmethenews.com/feed', enabled: true }, + { name: 'NPR', site: 'npr.org', enabled: true }, + { name: 'AP News', site: 'apnews.com', enabled: true }, + { name: 'Bring Me The News', site: 'bringmethenews.com', enabled: true }, ]; const DEFAULT_NEWS_PROMPT = `You are a news curator for Sojorn, a social media platform. Your job is to write brief, engaging social media posts about news articles. @@ -91,7 +91,8 @@ Rules: interface NewsSource { name: string; - rss_url: string; + site?: string; + rss_url?: string; enabled: boolean; } @@ -351,7 +352,8 @@ export default function OfficialAccountsPage() {
{src.name} - + {src.site || src.rss_url} + RSS
@@ -489,14 +491,14 @@ function CreateAccountForm({ onDone, initialProfile }: { onDone: () => void; ini { const n = [...newsSources]; n[i] = { ...n[i], name: e.target.value }; setNewsSources(n); }} className="w-32 px-2 py-1 border border-warm-300 rounded text-sm" /> - { const n = [...newsSources]; n[i] = { ...n[i], rss_url: e.target.value }; setNewsSources(n); }} + { const n = [...newsSources]; n[i] = { ...n[i], site: e.target.value }; setNewsSources(n); }} className="flex-1 px-2 py-1 border border-warm-300 rounded text-sm font-mono" /> ))} - @@ -601,14 +603,14 @@ function EditAccountForm({ config, onDone }: { config: Config; onDone: () => voi { const n = [...newsSources]; n[i] = { ...n[i], name: e.target.value }; setNewsSources(n); }} className="w-28 px-2 py-1 border border-warm-300 rounded text-xs" /> - { const n = [...newsSources]; n[i] = { ...n[i], rss_url: e.target.value }; setNewsSources(n); }} + { const n = [...newsSources]; n[i] = { ...n[i], site: e.target.value }; setNewsSources(n); }} className="flex-1 px-2 py-1 border border-warm-300 rounded text-xs font-mono" /> ))} - diff --git a/go-backend/internal/services/official_accounts_service.go b/go-backend/internal/services/official_accounts_service.go index 9fa16f6..fc676c4 100644 --- a/go-backend/internal/services/official_accounts_service.go +++ b/go-backend/internal/services/official_accounts_service.go @@ -43,13 +43,29 @@ type OfficialAccountConfig struct { AvatarURL string `json:"avatar_url,omitempty"` } -// NewsSource represents a single RSS feed configuration +// NewsSource represents a single RSS feed configuration. +// If Site is set, the Google News RSS URL is auto-constructed. +// If RSSURL is set directly, it's used as-is (legacy/fallback). type NewsSource struct { Name string `json:"name"` - RSSURL string `json:"rss_url"` + Site string `json:"site,omitempty"` + RSSURL string `json:"rss_url,omitempty"` Enabled bool `json:"enabled"` } +// GoogleNewsRSSURL builds a Google News RSS search URL for the given site domain. +func GoogleNewsRSSURL(site string) string { + return fmt.Sprintf("https://news.google.com/rss/search?q=site:%s&hl=en-US&gl=US&ceid=US:en", site) +} + +// EffectiveRSSURL returns the RSS URL to fetch — Google News if Site is set, otherwise RSSURL. +func (ns *NewsSource) EffectiveRSSURL() string { + if ns.Site != "" { + return GoogleNewsRSSURL(ns.Site) + } + return ns.RSSURL +} + // RSSFeed represents a parsed RSS feed type RSSFeed struct { Channel struct { @@ -60,11 +76,18 @@ type RSSFeed struct { // RSSItem represents a single RSS item type RSSItem struct { - Title string `xml:"title"` - Link string `xml:"link"` - Description string `xml:"description"` - PubDate string `xml:"pubDate"` - GUID string `xml:"guid"` + Title string `xml:"title" json:"title"` + Link string `xml:"link" json:"link"` + Description string `xml:"description" json:"description"` + PubDate string `xml:"pubDate" json:"pub_date"` + GUID string `xml:"guid" json:"guid"` + Source RSSSource `xml:"source" json:"source"` +} + +// RSSSource represents the element in Google News RSS items. +type RSSSource struct { + URL string `xml:"url,attr" json:"url"` + Name string `xml:",chardata" json:"name"` } // PostedArticle represents a previously posted article @@ -212,7 +235,7 @@ func (s *OfficialAccountsService) FetchRSS(ctx context.Context, rssURL string) ( if err != nil { return nil, err } - req.Header.Set("User-Agent", "Sojorn/1.0 (News Aggregator)") + req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; Sojorn/1.0)") resp, err := s.httpClient.Do(req) if err != nil { @@ -234,9 +257,52 @@ func (s *OfficialAccountsService) FetchRSS(ctx context.Context, rssURL string) ( return nil, fmt.Errorf("failed to parse RSS from %s: %w", rssURL, err) } + // If items come from Google News, resolve redirect links to actual article URLs + isGoogleNews := strings.Contains(rssURL, "news.google.com/rss") + if isGoogleNews { + for i := range feed.Channel.Items { + resolved := s.resolveGoogleNewsLink(feed.Channel.Items[i].Link) + if resolved != "" { + feed.Channel.Items[i].Link = resolved + } + } + } + return feed.Channel.Items, nil } +// resolveGoogleNewsLink follows the Google News redirect to get the actual article URL. +func (s *OfficialAccountsService) resolveGoogleNewsLink(googleURL string) string { + if googleURL == "" || !strings.Contains(googleURL, "news.google.com") { + return googleURL + } + + // Use a client that does NOT follow redirects + noRedirectClient := &http.Client{ + Timeout: 10 * time.Second, + CheckRedirect: func(req *http.Request, via []*http.Request) error { + return http.ErrUseLastResponse + }, + } + + resp, err := noRedirectClient.Get(googleURL) + if err != nil { + log.Debug().Err(err).Str("url", googleURL).Msg("Failed to resolve Google News link") + return googleURL + } + defer resp.Body.Close() + + if resp.StatusCode >= 300 && resp.StatusCode < 400 { + loc := resp.Header.Get("Location") + if loc != "" { + return loc + } + } + + // Some Google News links use JS-based redirect; try parsing from body as fallback + return googleURL +} + // FetchNewArticles fetches new articles from all enabled news sources for a config, // filtering out already-posted articles. func (s *OfficialAccountsService) FetchNewArticles(ctx context.Context, configID string) ([]RSSItem, []string, error) { @@ -254,10 +320,11 @@ func (s *OfficialAccountsService) FetchNewArticles(ctx context.Context, configID var sourceNames []string for _, src := range sources { - if !src.Enabled || src.RSSURL == "" { + rssURL := src.EffectiveRSSURL() + if !src.Enabled || rssURL == "" { continue } - items, err := s.FetchRSS(ctx, src.RSSURL) + items, err := s.FetchRSS(ctx, rssURL) if err != nil { log.Warn().Err(err).Str("source", src.Name).Msg("Failed to fetch RSS feed") continue