feat: switch news RSS to Google News site: format with redirect resolution
This commit is contained in:
parent
3c74457af1
commit
d623320256
|
|
@ -62,9 +62,9 @@ function ModelSelector({ value, onChange, className }: { value: string; onChange
|
|||
}
|
||||
|
||||
const DEFAULT_NEWS_SOURCES = [
|
||||
{ name: 'NPR', rss_url: 'https://feeds.npr.org/1001/rss.xml', enabled: true },
|
||||
{ name: 'AP News', rss_url: 'https://rsshub.app/apnews/topics/apf-topnews', enabled: true },
|
||||
{ name: 'Bring Me The News', rss_url: 'https://bringmethenews.com/feed', enabled: true },
|
||||
{ name: 'NPR', site: 'npr.org', enabled: true },
|
||||
{ name: 'AP News', site: 'apnews.com', enabled: true },
|
||||
{ name: 'Bring Me The News', site: 'bringmethenews.com', enabled: true },
|
||||
];
|
||||
|
||||
const DEFAULT_NEWS_PROMPT = `You are a news curator for Sojorn, a social media platform. Your job is to write brief, engaging social media posts about news articles.
|
||||
|
|
@ -91,7 +91,8 @@ Rules:
|
|||
|
||||
interface NewsSource {
|
||||
name: string;
|
||||
rss_url: string;
|
||||
site?: string;
|
||||
rss_url?: string;
|
||||
enabled: boolean;
|
||||
}
|
||||
|
||||
|
|
@ -351,7 +352,8 @@ export default function OfficialAccountsPage() {
|
|||
<div key={i} className="flex items-center gap-2 text-xs">
|
||||
<span className={`w-2 h-2 rounded-full ${src.enabled ? 'bg-green-500' : 'bg-gray-300'}`} />
|
||||
<span className="font-medium">{src.name}</span>
|
||||
<a href={src.rss_url} target="_blank" className="text-brand-500 hover:underline flex items-center gap-1">
|
||||
<span className="text-gray-400 font-mono">{src.site || src.rss_url}</span>
|
||||
<a href={src.site ? `https://news.google.com/rss/search?q=site:${src.site}&hl=en-US&gl=US&ceid=US:en` : src.rss_url} target="_blank" className="text-brand-500 hover:underline flex items-center gap-1">
|
||||
RSS <ExternalLink className="w-3 h-3" />
|
||||
</a>
|
||||
</div>
|
||||
|
|
@ -489,14 +491,14 @@ function CreateAccountForm({ onDone, initialProfile }: { onDone: () => void; ini
|
|||
<input type="text" value={src.name} placeholder="Name"
|
||||
onChange={(e) => { const n = [...newsSources]; n[i] = { ...n[i], name: e.target.value }; setNewsSources(n); }}
|
||||
className="w-32 px-2 py-1 border border-warm-300 rounded text-sm" />
|
||||
<input type="text" value={src.rss_url} placeholder="RSS URL"
|
||||
onChange={(e) => { const n = [...newsSources]; n[i] = { ...n[i], rss_url: e.target.value }; setNewsSources(n); }}
|
||||
<input type="text" value={src.site || ''} placeholder="site domain (e.g. npr.org)"
|
||||
onChange={(e) => { const n = [...newsSources]; n[i] = { ...n[i], site: e.target.value }; setNewsSources(n); }}
|
||||
className="flex-1 px-2 py-1 border border-warm-300 rounded text-sm font-mono" />
|
||||
<button type="button" onClick={() => setNewsSources(newsSources.filter((_, j) => j !== i))}
|
||||
className="text-red-500 hover:text-red-700 text-sm">✕</button>
|
||||
</div>
|
||||
))}
|
||||
<button type="button" onClick={() => setNewsSources([...newsSources, { name: '', rss_url: '', enabled: true }])}
|
||||
<button type="button" onClick={() => setNewsSources([...newsSources, { name: '', site: '', enabled: true }])}
|
||||
className="text-sm text-brand-500 hover:text-brand-600 flex items-center gap-1 mt-1">
|
||||
<Plus className="w-3 h-3" /> Add Source
|
||||
</button>
|
||||
|
|
@ -601,14 +603,14 @@ function EditAccountForm({ config, onDone }: { config: Config; onDone: () => voi
|
|||
<input type="text" value={src.name}
|
||||
onChange={(e) => { const n = [...newsSources]; n[i] = { ...n[i], name: e.target.value }; setNewsSources(n); }}
|
||||
className="w-28 px-2 py-1 border border-warm-300 rounded text-xs" />
|
||||
<input type="text" value={src.rss_url}
|
||||
onChange={(e) => { const n = [...newsSources]; n[i] = { ...n[i], rss_url: e.target.value }; setNewsSources(n); }}
|
||||
<input type="text" value={src.site || ''}
|
||||
onChange={(e) => { const n = [...newsSources]; n[i] = { ...n[i], site: e.target.value }; setNewsSources(n); }}
|
||||
className="flex-1 px-2 py-1 border border-warm-300 rounded text-xs font-mono" />
|
||||
<button type="button" onClick={() => setNewsSources(newsSources.filter((_, j) => j !== i))}
|
||||
className="text-red-500 text-xs">✕</button>
|
||||
</div>
|
||||
))}
|
||||
<button type="button" onClick={() => setNewsSources([...newsSources, { name: '', rss_url: '', enabled: true }])}
|
||||
<button type="button" onClick={() => setNewsSources([...newsSources, { name: '', site: '', enabled: true }])}
|
||||
className="text-xs text-brand-500 hover:text-brand-600 flex items-center gap-1 mt-1">
|
||||
<Plus className="w-3 h-3" /> Add
|
||||
</button>
|
||||
|
|
|
|||
|
|
@ -43,13 +43,29 @@ type OfficialAccountConfig struct {
|
|||
AvatarURL string `json:"avatar_url,omitempty"`
|
||||
}
|
||||
|
||||
// NewsSource represents a single RSS feed configuration
|
||||
// NewsSource represents a single RSS feed configuration.
|
||||
// If Site is set, the Google News RSS URL is auto-constructed.
|
||||
// If RSSURL is set directly, it's used as-is (legacy/fallback).
|
||||
type NewsSource struct {
|
||||
Name string `json:"name"`
|
||||
RSSURL string `json:"rss_url"`
|
||||
Site string `json:"site,omitempty"`
|
||||
RSSURL string `json:"rss_url,omitempty"`
|
||||
Enabled bool `json:"enabled"`
|
||||
}
|
||||
|
||||
// GoogleNewsRSSURL builds a Google News RSS search URL for the given site domain.
|
||||
func GoogleNewsRSSURL(site string) string {
|
||||
return fmt.Sprintf("https://news.google.com/rss/search?q=site:%s&hl=en-US&gl=US&ceid=US:en", site)
|
||||
}
|
||||
|
||||
// EffectiveRSSURL returns the RSS URL to fetch — Google News if Site is set, otherwise RSSURL.
|
||||
func (ns *NewsSource) EffectiveRSSURL() string {
|
||||
if ns.Site != "" {
|
||||
return GoogleNewsRSSURL(ns.Site)
|
||||
}
|
||||
return ns.RSSURL
|
||||
}
|
||||
|
||||
// RSSFeed represents a parsed RSS feed
|
||||
type RSSFeed struct {
|
||||
Channel struct {
|
||||
|
|
@ -60,11 +76,18 @@ type RSSFeed struct {
|
|||
|
||||
// RSSItem represents a single RSS item
|
||||
type RSSItem struct {
|
||||
Title string `xml:"title"`
|
||||
Link string `xml:"link"`
|
||||
Description string `xml:"description"`
|
||||
PubDate string `xml:"pubDate"`
|
||||
GUID string `xml:"guid"`
|
||||
Title string `xml:"title" json:"title"`
|
||||
Link string `xml:"link" json:"link"`
|
||||
Description string `xml:"description" json:"description"`
|
||||
PubDate string `xml:"pubDate" json:"pub_date"`
|
||||
GUID string `xml:"guid" json:"guid"`
|
||||
Source RSSSource `xml:"source" json:"source"`
|
||||
}
|
||||
|
||||
// RSSSource represents the <source> element in Google News RSS items.
|
||||
type RSSSource struct {
|
||||
URL string `xml:"url,attr" json:"url"`
|
||||
Name string `xml:",chardata" json:"name"`
|
||||
}
|
||||
|
||||
// PostedArticle represents a previously posted article
|
||||
|
|
@ -212,7 +235,7 @@ func (s *OfficialAccountsService) FetchRSS(ctx context.Context, rssURL string) (
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Set("User-Agent", "Sojorn/1.0 (News Aggregator)")
|
||||
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; Sojorn/1.0)")
|
||||
|
||||
resp, err := s.httpClient.Do(req)
|
||||
if err != nil {
|
||||
|
|
@ -234,9 +257,52 @@ func (s *OfficialAccountsService) FetchRSS(ctx context.Context, rssURL string) (
|
|||
return nil, fmt.Errorf("failed to parse RSS from %s: %w", rssURL, err)
|
||||
}
|
||||
|
||||
// If items come from Google News, resolve redirect links to actual article URLs
|
||||
isGoogleNews := strings.Contains(rssURL, "news.google.com/rss")
|
||||
if isGoogleNews {
|
||||
for i := range feed.Channel.Items {
|
||||
resolved := s.resolveGoogleNewsLink(feed.Channel.Items[i].Link)
|
||||
if resolved != "" {
|
||||
feed.Channel.Items[i].Link = resolved
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return feed.Channel.Items, nil
|
||||
}
|
||||
|
||||
// resolveGoogleNewsLink follows the Google News redirect to get the actual article URL.
|
||||
func (s *OfficialAccountsService) resolveGoogleNewsLink(googleURL string) string {
|
||||
if googleURL == "" || !strings.Contains(googleURL, "news.google.com") {
|
||||
return googleURL
|
||||
}
|
||||
|
||||
// Use a client that does NOT follow redirects
|
||||
noRedirectClient := &http.Client{
|
||||
Timeout: 10 * time.Second,
|
||||
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
||||
return http.ErrUseLastResponse
|
||||
},
|
||||
}
|
||||
|
||||
resp, err := noRedirectClient.Get(googleURL)
|
||||
if err != nil {
|
||||
log.Debug().Err(err).Str("url", googleURL).Msg("Failed to resolve Google News link")
|
||||
return googleURL
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode >= 300 && resp.StatusCode < 400 {
|
||||
loc := resp.Header.Get("Location")
|
||||
if loc != "" {
|
||||
return loc
|
||||
}
|
||||
}
|
||||
|
||||
// Some Google News links use JS-based redirect; try parsing from body as fallback
|
||||
return googleURL
|
||||
}
|
||||
|
||||
// FetchNewArticles fetches new articles from all enabled news sources for a config,
|
||||
// filtering out already-posted articles.
|
||||
func (s *OfficialAccountsService) FetchNewArticles(ctx context.Context, configID string) ([]RSSItem, []string, error) {
|
||||
|
|
@ -254,10 +320,11 @@ func (s *OfficialAccountsService) FetchNewArticles(ctx context.Context, configID
|
|||
var sourceNames []string
|
||||
|
||||
for _, src := range sources {
|
||||
if !src.Enabled || src.RSSURL == "" {
|
||||
rssURL := src.EffectiveRSSURL()
|
||||
if !src.Enabled || rssURL == "" {
|
||||
continue
|
||||
}
|
||||
items, err := s.FetchRSS(ctx, src.RSSURL)
|
||||
items, err := s.FetchRSS(ctx, rssURL)
|
||||
if err != nil {
|
||||
log.Warn().Err(err).Str("source", src.Name).Msg("Failed to fetch RSS feed")
|
||||
continue
|
||||
|
|
|
|||
Loading…
Reference in a new issue