1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import feedparser, urllib2, socket
from hashlib import md5
def getRssDataAsDict(self, url, username=None, password=None):
result = {}
translate = self.Base_translateString
# no url, no feed to read
if url in ('', None, 'None',):
# no URL
return {'status':-1}
# use authentication or not?
handlers = []
if username is not None and password is not None:
passman = urllib2.HTTPPasswordMgrWithDefaultRealm()
passman.add_password(None, url, username, password)
auth_handler = urllib2.HTTPBasicAuthHandler(passman)
handlers.append(auth_handler)
# set shorter timeouts and revert default at enf of read
default_timeout = socket.getdefaulttimeout()
socket.setdefaulttimeout(60.0)
try:
d = feedparser.parse(url, handlers=handlers)
finally:
socket.setdefaulttimeout(default_timeout)
if d.bozo and isinstance(d.bozo_exception, urllib2.URLError):
# we have an URL error
return {'status':-2}
elif d.bozo:
# some bozo exceptions can be ignored
if not isinstance(d.bozo_exception, (
feedparser.CharacterEncodingOverride,
)):
return {'status': -5}
if d.status == 401:
return {'status':-3}
elif d.status == 404:
return {'status':-4}
result['items'] = []
# some feeds may not provide logo
if d.feed.get('image', None) is not None:
result['logo'] = d.feed.image['href']
result['title'] = d.feed.title
result['link'] = d.feed.link
for entry in d.entries:
entry_dict = {}
entry_dict['title'] = entry['title']
entry_dict['link'] = entry['link']
entry_dict['other_links'] = [x['href'] for x in entry['links']]
entry_dict['md5'] = md5(entry['link']).hexdigest()
entry_dict['content'] = entry.get('summary', '')
entry_dict['date'] = entry.get('updated', None)
entry_dict['img'] = [x['href'] for x in entry.get('enclosures', [])]
entry_dict['updated_parsed'] = entry.get('updated_parsed', None)
result['items'].append(entry_dict)
# sort by date
result['items'] = sorted(result['items'], key=lambda k: k['updated_parsed'])
result['items'].reverse()
result['status'] = 0
return result