#!/usr/bin/python

""" This script searches identifiers of files uploaded on archive.org with
a specific email address (defined below), then creates an RSS feed with the
information it found.

written by mihi (mihi@lo-res.org) in 2009 for
http://anotherworldispossible.soup.io.
I AM NOT RESPONSIBLE FOR ANYTHING THIS SCRIPT DOES OR DOES NOT! """

import xml.dom.minidom
import urllib,os,re,sys

"""--------------------------------------
Configuration follows:
-----------------------------------------"""

""" channel contains information for the channel, this should all be valid
elements in rss 2.0"""
channel=({"title":"Another World Is Possible", 
"description":"A revolutionary anarchist podcast originating from Boston, MA.",
"language":"en-us", "link":"http://wake.lo-res.org/~mihi/a2r.rss",
"ttl":"15","managingEditor":"another.world.is.possible.podcast@gmail.com"})

image=({"url":"http://anotherworldispossible.podbean.com/mf/web/gg47ph/Podcast_Album_cover.jpg",
"title":"Another World Is Possible",
"link":"http://anotheworldispossible.soup.io"})

""" email containes the email address, searched for as uploader on
archive.org """
email="another.world.is.possible.podcast@gmail.com"

""" the cache directory, better make sure this directory exists. All items
will be cached there. simply delete them if you want to purge them """
cache=os.path.expanduser("~/.a2r/cache/")

"""-------------------------------------
End of Configuration
-------------------------------------"""


def getresultnumber(email):
	""" Returns the number of results for the uploader search """
	a=xml.dom.minidom.parse(urllib.urlopen(
	"http://www.archive.org/advancedsearch.php?q=uploader%3A%28"+email+
	"%29&rows=0&fmt=xml&xmlsearch=Search"))
	return a.documentElement.getElementsByTagName("result"
	)[0].getAttribute("numFound")
	
def getfilename(identifier,format):
	""" Gets the filename for a specific format of an identifier """
	files=xml.dom.minidom.parse(urllib.urlopen(
	"http://www.archive.org/download/"+identifier+"/"+
	identifier+"_files.xml"))
	for file in files.documentElement.getElementsByTagName("file"):
		fr=re.compile(format)
		if fr.search(file.getElementsByTagName("format"
		)[0].firstChild.data):
			return file.getAttribute("name")
	
def makeitem(feed,chan,identifier):
	""" Makes an item in a feed, from an identifier """
	try:
		metadata=xml.dom.minidom.parse(urllib.urlopen(
		"http://www.archive.org/download/"+identifier+"/"
		+identifier+"_meta.xml"))
		rssitem=feed.createElement("item")
		guid=rssitem.appendChild(feed.createElement("guid"))
		guid.appendChild(feed.createTextNode(
		"http://www.archive.org/details/"+identifier))
		guid.setAttribute("isPermaLink","true")
		for i in meta2rss.keys():
			nc=rssitem.appendChild(
			metadata.documentElement.getElementsByTagName(i)[0])
			nc.tagName=meta2rss[i]
		filename=getfilename(identifier,"MP3")
		if filename:
			enc=rssitem.appendChild(feed.createElement("enclosure"))
			enc.setAttribute("url",
			"http://www.archive.org/download/"+identifier+"/"
			+filename)
			enc.setAttribute("type","audio/mpeg")
			""" The following section will create a download
			link, within the descrioption """
			dls=feed.createTextNode(
			"<br/><a class='download' href='"+
			"http://www.archive.org/download/"+identifier+"/"
			+filename+"' target='_new'>download</a>")
			rssitem.getElementsByTagName("description"
			)[0].firstChild.data+=dls.data
			"""Downloadlink added """
		else:
			return 0
		link=rssitem.appendChild(feed.createElement("link"))
		link.appendChild(feed.createTextNode(
		"http://www.archive.org/details/"+identifier))
		chan.appendChild(rssitem)
		pf=open(cache+identifier,"w")
		pf.write(rssitem.toxml("utf-8"))
		pf.close()
	except:
		""" Failed to Load the description """
	


meta2rss=({"title":"title","description":"description","publicdate":"pubdate"})
	
impl=xml.dom.minidom.getDOMImplementation()
rss=impl.createDocument(None, "rss", None)
rssde=rss.documentElement
rssde.setAttribute("version","2.0")
rsschan=rssde.appendChild(rss.createElement("channel"))
for i in channel.keys():
	rssadd=rsschan.appendChild(rss.createElement(i))
	rssadd.appendChild(rss.createTextNode(channel[i]))
rsschanimage=rsschan.appendChild(rss.createElement("image"))
for i in image.keys():
	rssadd=rsschanimage.appendChild(rss.createElement(i))
	rssadd.appendChild(rss.createTextNode(image[i]))

rows=getresultnumber(email)
archive=xml.dom.minidom.parse(urllib.urlopen(
"http://www.archive.org/advancedsearch.php?q=uploader%3A%28"+email+
"%29&fl[]=identifier&sort[]=publicdate+desc&sort[]=&sort[]=&rows="+rows+
"&page=1&fmt=xml&xmlsearch=Search"))
docs=archive.documentElement.getElementsByTagName("doc")
for doc in docs:
	identifier=doc.getElementsByTagName("str")[0].firstChild.data
	try:
		rssitem=xml.dom.minidom.parse(cache+identifier)
		rsschan.appendChild(rssitem.documentElement)
	except:	
		makeitem(rss,rsschan,identifier)
f=open(sys.argv[1],"w")		
f.write(rss.toprettyxml(encoding="utf-8"))		
f.close()
