#!/usr/bin/python """ This is meant to be called from cron Fetch an RSS item, and dump it to disk. """ import sys, os, getopt, string from xmllib import XMLParser from VirtualBrowser import VirtualBrowser from timeoutsocket import Timeout from log import * class RSSParser(XMLParser): def __init__ (self, verbose=0): self.data = "" XMLParser.__init__(self) self.state = [] self.title = "" self.description = "" self.link = "" self.image = "" self.image_alt = "" self.image_width = 0 self.image_height = 0 self.image_link = "" self.item_title = "" self.item_link = "" self.item_desc = "" self.items = [] def handle_data(self, data): self.data = self.data + data def unknown_starttag(self, tag, attrs): if tag in ["channel", "item", "image"]: self.state.append(tag) self.data = "" def unknown_endtag(self, tag): if self.state[-1] == "image": if tag == "title": self.image_alt = self.data elif tag == "url": self.image = self.data elif tag == "link": self.image_link = self.data elif tag == "width": self.image_width = int(self.data) elif tag == "height": self.image_height = int(self.data) elif tag == "image": self.state = self.state[:-1] elif self.state[-1] == "item": if tag == "title": self.item_title = self.data elif tag == "link": self.item_link = self.data elif tag == "description": self.item_desc = self.data elif tag == "item": self.items.append( (self.item_title, self.item_link, self.item_desc) ) self.item_title = "" self.item_link = "" self.item_desc = "" self.state = self.state[:-1] elif self.state[-1] == "channel": if tag == "title": self.title = self.data elif tag == "description": self.description = self.data elif tag == "link": self.link = self.data self.data = "" def usage(argv0): print "%s [--help]" % argv0 def main(argv, environ): alist, args = getopt.getopt(argv[1:], "", ["help"]) for (field, val) in alist: if field == "--help": usage (argv[0]) return if len(args) < 2: usage (argv[0]) return url = args[0] file = args[1] vb = VirtualBrowser() vb.timeout_(10) try: errcode, errmsg, page, headers = vb.fetchpage(url) if errcode != 200: log("Unable to fetch page %s: %s %s" % (url, errcode, errmsg)) else: parser = RSSParser() parser.feed(page) parser.close() out = [] out.append("Channel.Title = %s" % parser.title) out.append("Channel.Description = %s" % parser.description) out.append("Channel.Link = %s" % parser.link) out.append("Channel.Image.Title = %s" % parser.image_alt) out.append("Channel.Image.SRC = %s" % parser.image) out.append("Channel.Image.Link = %s" % parser.image_link) out.append("Channel.Image.Width = %s" % parser.image_width) out.append("Channel.Image.Height = %s" % parser.image_height) for x in range(len(parser.items)): (title, link, desc) = parser.items[x] out.append("Channel.Items.%d.Title = %s" % (x, title)) out.append("Channel.Items.%d.Link = %s" % (x, link)) out.append("Channel.Items.%d.Description = %s" % (x, desc)) open(file, "w").write(string.join(out, '\n')) except Timeout: log("Unable to fetch page %s: Timeout" % url) if __name__ == "__main__": main (sys.argv, os.environ)