#!/usr/bin/env python

import psycopg2
import os
import sys
import re
import xml.etree.ElementTree as ElementTree

connection = psycopg2.connect("dbname=rfcs")
cursor = connection.cursor()
#cursor.execute("SET client_encoding TO 'iso-8859-1'");
cursor.execute("BEGIN");
cursor.execute("DELETE FROM Rfcs");
insertion = "INSERT INTO Rfcs (num, title, body) VALUES (%(num)s, %(title)s, %(body)s)"
tree = ElementTree.parse("rfc-xml/bibxml/index-expansed.xml")
rfcindex = tree.getroot()
for rfc in rfcindex.findall("./back/references/reference"):
    fullnum = rfc.attrib["anchor"]
    match = re.search("^RFC0*([0-9]+)$", fullnum)
    if not match:
        raise Exception("Wrong RFC number %s" % fullnum)
    num = int(match.group(1))
    title = rfc.findtext("./front/title")
    filename = "rfc-mirror/rfc%i.txt" % num
    try:
        body = open(filename).read()
    except IOError:
        # Some RFC (for instance RFC8) are not available
        print >>sys.stderr, ("Warning: no text for %s" % fullnum)
        continue
    cursor.execute(insertion, {'num': num, 'body': body, 'title': title})
cursor.execute("COMMIT");

