@@ -10,6 +10,15 @@ import (
1010 "time"
1111)
1212
13+ // FeedType 表示订阅源类型
14+ type FeedType string
15+
16+ const (
17+ TypeRSS FeedType = "rss"
18+ TypeAtom FeedType = "atom"
19+ )
20+
21+ // FeedItem 代表一个订阅项目
1322type FeedItem struct {
1423 ID string
1524 Title string
@@ -18,163 +27,185 @@ type FeedItem struct {
1827 PubDate time.Time
1928}
2029
30+ // Feed 代表一个订阅源
2131type Feed struct {
22- Type string
32+ Type FeedType
2333 Title string
2434 Link string
2535 Items []* FeedItem
2636}
2737
28- // parseTime attempts to parse a time string using various common formats
29- func parseTime ( str string ) ( t time. Time , err error ) {
30- layouts := [] string {
31- time .RFC3339 ,
32- time .RFC1123 ,
33- time . RFC1123Z ,
34- "Mon , 02 Jan 2006 15:04:05 GMT " ,
35- "Monday, 02 Jan 2006 15 :04:05 -07:00" ,
36- "2006-01-02T15:04:05Z " , // Common ISO format without timezone offset
37- " 2006-01-02T15 :04:05" , // ISO format without Z
38- "Mon, 02 Jan 2006 15:04:05 -0700" , // RFC822Z
39- "02 Jan 2006 15:04:05 -0700" , // Common format without day of week
40- "2006-01-02" , // Just date
41- }
42-
43- // Clean the string first
38+ // 定义常用的时间格式
39+ var timeLayouts = [] string {
40+ time . RFC3339 ,
41+ time .RFC1123 ,
42+ time .RFC1123Z ,
43+ "Mon, 02 Jan 2006 15:04:05 GMT" ,
44+ "Monday , 02 Jan 2006 15:04:05 -07:00 " ,
45+ " 2006-01-02T15 :04:05Z" , // 无时区偏移的常见ISO格式
46+ "2006-01-02T15:04:05 " , // 无Z的ISO格式
47+ "Mon, 02 Jan 2006 15 :04:05 -0700 " , // RFC822Z
48+ " 02 Jan 2006 15:04:05 -0700" , // 无星期的常见格式
49+ "2006-01-02" , // 仅日期
50+ }
51+
52+ // parseTime 尝试使用各种常见格式解析时间字符串
53+ func parseTime ( str string ) (time. Time , error ) {
4454 str = strings .TrimSpace (str )
4555
46- // Try each layout
47- for _ , layout := range layouts {
56+ // 如果字符串为空,直接返回当前时间
57+ if str == "" {
58+ return time .Now (), errors .New ("empty time string" )
59+ }
60+
61+ // 尝试每种布局
62+ for _ , layout := range timeLayouts {
4863 t , err := time .Parse (layout , str )
4964 if err == nil {
5065 return t , nil
5166 }
5267 }
5368
54- // If we couldn't parse, return current time and error
55- err = fmt .Errorf ("could not parse time: %s" , str )
56- return time .Now (), err
69+ // 如果无法解析,返回当前时间和错误
70+ return time .Now (), fmt .Errorf ("could not parse time: %s" , str )
5771}
5872
59- // ParseFeed auto-detects the feed type (RSS or Atom) and returns a generic Feed.
60- func ParseFeed (data []byte ) (feed * Feed , err error ) {
61- // Try to parse as RSS first
62- rssFeed , rssErr := ParseRss (data )
63- if rssErr == nil && rssFeed != nil {
64- // It's an RSS feed
65- feed = & Feed {
66- Type : "rss" ,
67- Link : rssFeed .Link ,
68- Title : rssFeed .Title ,
69- Items : make ([]* FeedItem , 0 , len (rssFeed .Items )),
70- }
73+ // ParseFeed 自动检测订阅源类型(RSS或Atom)并返回通用Feed
74+ func ParseFeed (data []byte ) (* Feed , error ) {
75+ // 清理数据 - 如果存在BOM则去除
76+ data = bytes .TrimPrefix (data , []byte {0xEF , 0xBB , 0xBF })
77+
78+ // 尝试解析为RSS
79+ if feed , err := parseAsRSS (data ); err == nil && feed != nil {
80+ return feed , nil
81+ }
7182
72- // Convert RSS items to generic feed items
73- for _ , item := range rssFeed .Items {
74- // Parse the publication date
75- pubDate , parseErr := parseTime (item .PubDate )
76- if parseErr != nil {
77- // Use current time if parsing fails, but continue
78- pubDate = time .Now ()
79- }
80-
81- feedItem := & FeedItem {
82- ID : item .ID (),
83- Title : cleanContent (item .Title ),
84- Link : item .Link ,
85- Description : cleanContent (item .GetContent ()),
86- PubDate : pubDate ,
87- }
88- feed .Items = append (feed .Items , feedItem )
83+ // 尝试解析为Atom
84+ if feed , err := parseAsAtom (data ); err == nil && feed != nil {
85+ return feed , nil
86+ }
87+
88+ return nil , errors .New ("failed to parse feed: not a valid RSS or Atom format" )
89+ }
90+
91+ // parseAsRSS 尝试将数据解析为RSS格式
92+ func parseAsRSS (data []byte ) (* Feed , error ) {
93+ rssFeed , err := ParseRss (data )
94+ if err != nil || rssFeed == nil {
95+ return nil , err
96+ }
97+
98+ feed := & Feed {
99+ Type : TypeRSS ,
100+ Link : rssFeed .Link ,
101+ Title : rssFeed .Title ,
102+ Items : make ([]* FeedItem , 0 , len (rssFeed .Items )),
103+ }
104+
105+ // 将RSS项目转换为通用订阅项目
106+ for _ , item := range rssFeed .Items {
107+ pubDate , _ := parseTime (item .PubDate ) // 忽略错误,使用返回的时间
108+
109+ feedItem := & FeedItem {
110+ ID : item .ID (),
111+ Title : cleanContent (item .Title ),
112+ Link : item .Link ,
113+ Description : cleanContent (item .GetContent ()),
114+ PubDate : pubDate ,
89115 }
90- return
91- }
92-
93- // Try to parse as Atom
94- atomFeed , atomErr := ParseAtom (data )
95- if atomErr == nil && atomFeed != nil {
96- // It's an Atom feed
97- feed = & Feed {
98- Type : "atom" ,
99- Link : atomFeed .Links [0 ].Href ,
100- Title : atomFeed .Title .Data ,
101- Items : make ([]* FeedItem , 0 , len (atomFeed .Entries )),
116+ feed .Items = append (feed .Items , feedItem )
117+ }
118+
119+ return feed , nil
120+ }
121+
122+ // parseAsAtom 尝试将数据解析为Atom格式
123+ func parseAsAtom (data []byte ) (* Feed , error ) {
124+ atomFeed , err := ParseAtom (data )
125+ if err != nil || atomFeed == nil {
126+ return nil , err
127+ }
128+
129+ // 确保links不为空
130+ if len (atomFeed .Links ) == 0 {
131+ return nil , errors .New ("atom feed has no links" )
132+ }
133+
134+ feed := & Feed {
135+ Type : TypeAtom ,
136+ Link : atomFeed .Links [0 ].Href ,
137+ Title : atomFeed .Title .Data ,
138+ Items : make ([]* FeedItem , 0 , len (atomFeed .Entries )),
139+ }
140+
141+ // 将Atom条目转换为通用订阅项目
142+ for _ , entry := range atomFeed .Entries {
143+ // 查找链接
144+ link := findBestLink (entry .Links )
145+
146+ // 获取发布日期(优先使用Published,备用Updated)
147+ pubDateStr := entry .Published
148+ if pubDateStr == "" {
149+ pubDateStr = entry .Updated
102150 }
103151
104- // Convert Atom entries to generic feed items
105- for _ , entry := range atomFeed .Entries {
106- var link string
107- // Find the first link or the link with rel="alternate"
108- for _ , l := range entry .Links {
109- if link == "" || l .Rel == "alternate" {
110- link = l .Href
111- }
112- if l .Rel == "alternate" {
113- break
114- }
115- }
116-
117- // Get publication date (prefer Published, fallback to Updated)
118- pubDateStr := entry .Published
119- if pubDateStr == "" {
120- pubDateStr = entry .Updated
121- }
122-
123- // Parse the publication date
124- pubDate , parseErr := parseTime (pubDateStr )
125- if parseErr != nil {
126- // Use current time if parsing fails, but continue
127- pubDate = time .Now ()
128- }
129-
130- feedItem := & FeedItem {
131- ID : entry .ID ,
132- Title : cleanContent (entry .Title .Data ),
133- Link : link ,
134- Description : cleanContent (entry .GetContent ()),
135- PubDate : pubDate ,
136- }
137- feed .Items = append (feed .Items , feedItem )
152+ pubDate , _ := parseTime (pubDateStr ) // 忽略错误,使用返回的时间
153+
154+ feedItem := & FeedItem {
155+ ID : entry .ID ,
156+ Title : cleanContent (entry .Title .Data ),
157+ Link : link ,
158+ Description : cleanContent (entry .GetContent ()),
159+ PubDate : pubDate ,
138160 }
139- return
161+ feed .Items = append (feed .Items , feedItem )
162+ }
163+
164+ return feed , nil
165+ }
166+
167+ // findBestLink 从链接列表中找出最佳链接
168+ func findBestLink (links []AtomLink ) string {
169+ if len (links ) == 0 {
170+ return ""
140171 }
141172
142- // Neither RSS nor Atom parsing succeeded
143- if rssErr != nil && atomErr != nil {
144- return nil , errors .New ("failed to parse feed: not a valid RSS or Atom format" )
173+ // 首先尝试找到rel="alternate"的链接
174+ for _ , l := range links {
175+ if l .Rel == "alternate" {
176+ return l .Href
177+ }
145178 }
146179
147- return nil , errors .New ("unknown feed format" )
180+ // 如果没有,返回第一个链接
181+ return links [0 ].Href
148182}
149183
150- // FetchFeed downloads a feed from the given URL and parses it.
151- func FetchFeed (url string ) (feed * Feed , err error ) {
152- // Create HTTP client with timeout
184+ // FetchFeed 从给定URL下载订阅源并解析它
185+ func FetchFeed (url string ) (* Feed , error ) {
186+ // 创建带超时的HTTP客户端
153187 client := & http.Client {
154188 Timeout : 30 * time .Second ,
155189 }
156190
157- // Send GET request
191+ // 发送GET请求
158192 resp , err := client .Get (url )
159193 if err != nil {
160- return nil , err
194+ return nil , fmt . Errorf ( "failed to fetch feed: %w" , err )
161195 }
162196 defer resp .Body .Close ()
163197
164- // Check status code
198+ // 检查状态码
165199 if resp .StatusCode != http .StatusOK {
166- return nil , errors . New ("failed to fetch feed: " + resp .Status )
200+ return nil , fmt . Errorf ("failed to fetch feed: HTTP %s" , resp .Status )
167201 }
168202
169- // Read response body
203+ // 读取响应体
170204 data , err := io .ReadAll (resp .Body )
171205 if err != nil {
172- return nil , err
206+ return nil , fmt . Errorf ( "failed to read response: %w" , err )
173207 }
174208
175- // Clean data - trim BOM if present
176- data = bytes .TrimPrefix (data , []byte {0xEF , 0xBB , 0xBF })
177-
178- // Parse the feed
209+ // 解析订阅源
179210 return ParseFeed (data )
180211}
0 commit comments