feat: switch news RSS to Google News site: format with redirect resolution

This commit is contained in:
Patrick Britton 2026-02-08 13:02:57 -06:00
parent 3c74457af1
commit d623320256
2 changed files with 90 additions and 21 deletions

View file

@ -62,9 +62,9 @@ function ModelSelector({ value, onChange, className }: { value: string; onChange
} }
const DEFAULT_NEWS_SOURCES = [ const DEFAULT_NEWS_SOURCES = [
{ name: 'NPR', rss_url: 'https://feeds.npr.org/1001/rss.xml', enabled: true }, { name: 'NPR', site: 'npr.org', enabled: true },
{ name: 'AP News', rss_url: 'https://rsshub.app/apnews/topics/apf-topnews', enabled: true }, { name: 'AP News', site: 'apnews.com', enabled: true },
{ name: 'Bring Me The News', rss_url: 'https://bringmethenews.com/feed', enabled: true }, { name: 'Bring Me The News', site: 'bringmethenews.com', enabled: true },
]; ];
const DEFAULT_NEWS_PROMPT = `You are a news curator for Sojorn, a social media platform. Your job is to write brief, engaging social media posts about news articles. const DEFAULT_NEWS_PROMPT = `You are a news curator for Sojorn, a social media platform. Your job is to write brief, engaging social media posts about news articles.
@ -91,7 +91,8 @@ Rules:
interface NewsSource { interface NewsSource {
name: string; name: string;
rss_url: string; site?: string;
rss_url?: string;
enabled: boolean; enabled: boolean;
} }
@ -351,7 +352,8 @@ export default function OfficialAccountsPage() {
<div key={i} className="flex items-center gap-2 text-xs"> <div key={i} className="flex items-center gap-2 text-xs">
<span className={`w-2 h-2 rounded-full ${src.enabled ? 'bg-green-500' : 'bg-gray-300'}`} /> <span className={`w-2 h-2 rounded-full ${src.enabled ? 'bg-green-500' : 'bg-gray-300'}`} />
<span className="font-medium">{src.name}</span> <span className="font-medium">{src.name}</span>
<a href={src.rss_url} target="_blank" className="text-brand-500 hover:underline flex items-center gap-1"> <span className="text-gray-400 font-mono">{src.site || src.rss_url}</span>
<a href={src.site ? `https://news.google.com/rss/search?q=site:${src.site}&hl=en-US&gl=US&ceid=US:en` : src.rss_url} target="_blank" className="text-brand-500 hover:underline flex items-center gap-1">
RSS <ExternalLink className="w-3 h-3" /> RSS <ExternalLink className="w-3 h-3" />
</a> </a>
</div> </div>
@ -489,14 +491,14 @@ function CreateAccountForm({ onDone, initialProfile }: { onDone: () => void; ini
<input type="text" value={src.name} placeholder="Name" <input type="text" value={src.name} placeholder="Name"
onChange={(e) => { const n = [...newsSources]; n[i] = { ...n[i], name: e.target.value }; setNewsSources(n); }} onChange={(e) => { const n = [...newsSources]; n[i] = { ...n[i], name: e.target.value }; setNewsSources(n); }}
className="w-32 px-2 py-1 border border-warm-300 rounded text-sm" /> className="w-32 px-2 py-1 border border-warm-300 rounded text-sm" />
<input type="text" value={src.rss_url} placeholder="RSS URL" <input type="text" value={src.site || ''} placeholder="site domain (e.g. npr.org)"
onChange={(e) => { const n = [...newsSources]; n[i] = { ...n[i], rss_url: e.target.value }; setNewsSources(n); }} onChange={(e) => { const n = [...newsSources]; n[i] = { ...n[i], site: e.target.value }; setNewsSources(n); }}
className="flex-1 px-2 py-1 border border-warm-300 rounded text-sm font-mono" /> className="flex-1 px-2 py-1 border border-warm-300 rounded text-sm font-mono" />
<button type="button" onClick={() => setNewsSources(newsSources.filter((_, j) => j !== i))} <button type="button" onClick={() => setNewsSources(newsSources.filter((_, j) => j !== i))}
className="text-red-500 hover:text-red-700 text-sm"></button> className="text-red-500 hover:text-red-700 text-sm"></button>
</div> </div>
))} ))}
<button type="button" onClick={() => setNewsSources([...newsSources, { name: '', rss_url: '', enabled: true }])} <button type="button" onClick={() => setNewsSources([...newsSources, { name: '', site: '', enabled: true }])}
className="text-sm text-brand-500 hover:text-brand-600 flex items-center gap-1 mt-1"> className="text-sm text-brand-500 hover:text-brand-600 flex items-center gap-1 mt-1">
<Plus className="w-3 h-3" /> Add Source <Plus className="w-3 h-3" /> Add Source
</button> </button>
@ -601,14 +603,14 @@ function EditAccountForm({ config, onDone }: { config: Config; onDone: () => voi
<input type="text" value={src.name} <input type="text" value={src.name}
onChange={(e) => { const n = [...newsSources]; n[i] = { ...n[i], name: e.target.value }; setNewsSources(n); }} onChange={(e) => { const n = [...newsSources]; n[i] = { ...n[i], name: e.target.value }; setNewsSources(n); }}
className="w-28 px-2 py-1 border border-warm-300 rounded text-xs" /> className="w-28 px-2 py-1 border border-warm-300 rounded text-xs" />
<input type="text" value={src.rss_url} <input type="text" value={src.site || ''}
onChange={(e) => { const n = [...newsSources]; n[i] = { ...n[i], rss_url: e.target.value }; setNewsSources(n); }} onChange={(e) => { const n = [...newsSources]; n[i] = { ...n[i], site: e.target.value }; setNewsSources(n); }}
className="flex-1 px-2 py-1 border border-warm-300 rounded text-xs font-mono" /> className="flex-1 px-2 py-1 border border-warm-300 rounded text-xs font-mono" />
<button type="button" onClick={() => setNewsSources(newsSources.filter((_, j) => j !== i))} <button type="button" onClick={() => setNewsSources(newsSources.filter((_, j) => j !== i))}
className="text-red-500 text-xs"></button> className="text-red-500 text-xs"></button>
</div> </div>
))} ))}
<button type="button" onClick={() => setNewsSources([...newsSources, { name: '', rss_url: '', enabled: true }])} <button type="button" onClick={() => setNewsSources([...newsSources, { name: '', site: '', enabled: true }])}
className="text-xs text-brand-500 hover:text-brand-600 flex items-center gap-1 mt-1"> className="text-xs text-brand-500 hover:text-brand-600 flex items-center gap-1 mt-1">
<Plus className="w-3 h-3" /> Add <Plus className="w-3 h-3" /> Add
</button> </button>

View file

@ -43,13 +43,29 @@ type OfficialAccountConfig struct {
AvatarURL string `json:"avatar_url,omitempty"` AvatarURL string `json:"avatar_url,omitempty"`
} }
// NewsSource represents a single RSS feed configuration // NewsSource represents a single RSS feed configuration.
// If Site is set, the Google News RSS URL is auto-constructed.
// If RSSURL is set directly, it's used as-is (legacy/fallback).
type NewsSource struct { type NewsSource struct {
Name string `json:"name"` Name string `json:"name"`
RSSURL string `json:"rss_url"` Site string `json:"site,omitempty"`
RSSURL string `json:"rss_url,omitempty"`
Enabled bool `json:"enabled"` Enabled bool `json:"enabled"`
} }
// GoogleNewsRSSURL builds a Google News RSS search URL for the given site domain.
func GoogleNewsRSSURL(site string) string {
return fmt.Sprintf("https://news.google.com/rss/search?q=site:%s&hl=en-US&gl=US&ceid=US:en", site)
}
// EffectiveRSSURL returns the RSS URL to fetch — Google News if Site is set, otherwise RSSURL.
func (ns *NewsSource) EffectiveRSSURL() string {
if ns.Site != "" {
return GoogleNewsRSSURL(ns.Site)
}
return ns.RSSURL
}
// RSSFeed represents a parsed RSS feed // RSSFeed represents a parsed RSS feed
type RSSFeed struct { type RSSFeed struct {
Channel struct { Channel struct {
@ -60,11 +76,18 @@ type RSSFeed struct {
// RSSItem represents a single RSS item // RSSItem represents a single RSS item
type RSSItem struct { type RSSItem struct {
Title string `xml:"title"` Title string `xml:"title" json:"title"`
Link string `xml:"link"` Link string `xml:"link" json:"link"`
Description string `xml:"description"` Description string `xml:"description" json:"description"`
PubDate string `xml:"pubDate"` PubDate string `xml:"pubDate" json:"pub_date"`
GUID string `xml:"guid"` GUID string `xml:"guid" json:"guid"`
Source RSSSource `xml:"source" json:"source"`
}
// RSSSource represents the <source> element in Google News RSS items.
type RSSSource struct {
URL string `xml:"url,attr" json:"url"`
Name string `xml:",chardata" json:"name"`
} }
// PostedArticle represents a previously posted article // PostedArticle represents a previously posted article
@ -212,7 +235,7 @@ func (s *OfficialAccountsService) FetchRSS(ctx context.Context, rssURL string) (
if err != nil { if err != nil {
return nil, err return nil, err
} }
req.Header.Set("User-Agent", "Sojorn/1.0 (News Aggregator)") req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; Sojorn/1.0)")
resp, err := s.httpClient.Do(req) resp, err := s.httpClient.Do(req)
if err != nil { if err != nil {
@ -234,9 +257,52 @@ func (s *OfficialAccountsService) FetchRSS(ctx context.Context, rssURL string) (
return nil, fmt.Errorf("failed to parse RSS from %s: %w", rssURL, err) return nil, fmt.Errorf("failed to parse RSS from %s: %w", rssURL, err)
} }
// If items come from Google News, resolve redirect links to actual article URLs
isGoogleNews := strings.Contains(rssURL, "news.google.com/rss")
if isGoogleNews {
for i := range feed.Channel.Items {
resolved := s.resolveGoogleNewsLink(feed.Channel.Items[i].Link)
if resolved != "" {
feed.Channel.Items[i].Link = resolved
}
}
}
return feed.Channel.Items, nil return feed.Channel.Items, nil
} }
// resolveGoogleNewsLink follows the Google News redirect to get the actual article URL.
func (s *OfficialAccountsService) resolveGoogleNewsLink(googleURL string) string {
if googleURL == "" || !strings.Contains(googleURL, "news.google.com") {
return googleURL
}
// Use a client that does NOT follow redirects
noRedirectClient := &http.Client{
Timeout: 10 * time.Second,
CheckRedirect: func(req *http.Request, via []*http.Request) error {
return http.ErrUseLastResponse
},
}
resp, err := noRedirectClient.Get(googleURL)
if err != nil {
log.Debug().Err(err).Str("url", googleURL).Msg("Failed to resolve Google News link")
return googleURL
}
defer resp.Body.Close()
if resp.StatusCode >= 300 && resp.StatusCode < 400 {
loc := resp.Header.Get("Location")
if loc != "" {
return loc
}
}
// Some Google News links use JS-based redirect; try parsing from body as fallback
return googleURL
}
// FetchNewArticles fetches new articles from all enabled news sources for a config, // FetchNewArticles fetches new articles from all enabled news sources for a config,
// filtering out already-posted articles. // filtering out already-posted articles.
func (s *OfficialAccountsService) FetchNewArticles(ctx context.Context, configID string) ([]RSSItem, []string, error) { func (s *OfficialAccountsService) FetchNewArticles(ctx context.Context, configID string) ([]RSSItem, []string, error) {
@ -254,10 +320,11 @@ func (s *OfficialAccountsService) FetchNewArticles(ctx context.Context, configID
var sourceNames []string var sourceNames []string
for _, src := range sources { for _, src := range sources {
if !src.Enabled || src.RSSURL == "" { rssURL := src.EffectiveRSSURL()
if !src.Enabled || rssURL == "" {
continue continue
} }
items, err := s.FetchRSS(ctx, src.RSSURL) items, err := s.FetchRSS(ctx, rssURL)
if err != nil { if err != nil {
log.Warn().Err(err).Str("source", src.Name).Msg("Failed to fetch RSS feed") log.Warn().Err(err).Str("source", src.Name).Msg("Failed to fetch RSS feed")
continue continue