html_parser.html

from HTMLParser import HTMLParser
import string, urllib, re, os, exceptions, webbrowser

JUST_THE_US = 0


def download_and_parse(delim1, delim2, urlVar, typeOfData):

    from urllib import urlopen

    fh = urlopen(urlVar)
    var = fh.read()
    fh.close

    printFromHere = 0
    var1 = []
    dl1 = []

    
    if typeOfData == "AWS":

	genCount = 0
	var2 = ""
	zero = 0
	one = 1
	two = 2
	three = 3
	four = 4
	five = 5
	six = 6
	seven = 7
	eight = 8
	nine = 9
	ten = 10
	eleven = 11
	
	p = MyHTMLParser()
	dl = p.dl
	var1 = var.split()

	for k in range(len(var1)):
		if var1[k] == 'align="center">&nbsp;</td>':
			var1[k] = 'align="center">-</td>'

	var2 = " ".join(var1)
	
	p.feed(var2)

	for i in range(len(dl)):
		
		if dl[i] == delim1:
			printFromHere = 1

		if dl[i] == delim2:
			printFromHere = 0

		if printFromHere == 1 and dl[i] != " ":
			dl1 += [dl[i], ]

	for j in range(len(dl1)):
		if j == zero:
			print "found station ", dl1[j]
			zero = zero + 12

		if j == one:
			print "found date time ", dl1[j]
			one = one + 12

		if j == two:
			print "found temp ", dl1[j]
			two = two + 12

		if j == three:
			print "found dewpoint ", dl1[j]
			three = three + 12

		if j == four:
			print "found rel hum ", dl1[j]
			four = four + 12

		if j == five:
			print "found wind dir ", dl1[j]
			five = five + 12

		if j == six:
			print "found wind speed kph ", dl1[j]
			six = six + 12

		if j == seven:
			print "found wind speed knots ", dl1[j]
			seven = seven + 12

		if j == eight:
			print "found wind gusts kph ", dl1[j]
			eight = eight + 12

		if j == nine:
			print "found wind gusts knots ", dl1[j]
			nine = nine + 12

		if j == ten:
			print "found air pressure ", dl1[j]
			ten = ten + 12

		if j == eleven:
			print "rain since 9am ", dl1[j]
			eleven = eleven + 12
			

    elif typeOfData == 'txtHrlyObs':

        list1 = []
        var2 = []

        var1 = var.split("<pre>")

        for p in range(len(var1)):
            var2 = var1[p].split("</pre>")

        dl = var2[0].splitlines(1)
        
	for i in range(len(dl)):
		
		if dl[i] == delim1:
			printFromHere = 1

		if printFromHere == 1 and dl[i] != " ":
			dl1 += [dl[i], ]
			
        for j in range(len(dl1)):
            
            holder = ""
            col1 = ""
            col2 = ""
            col3 = ""
            col4 = ""
            col5 = ""
            col6 = ""
            col7 = ""
            col8 = ""
            col9 = ""
            col10 = ""
            col11 = ""
            col12 = ""
            col13 = ""
            col14 = ""
            col15 = ""
            
            if dl1[j] == delim1:
                pass
            else:
                list1 = list(dl1[j])
                for k in range(len(list1)):
                    holder = holder + list1[k]

                    if k == 11:
                        col1 = holder
                        holder = ""
                    elif k == 16:
                        col2 = holder
                        holder = ""
                    elif k == 22:
                        col3 = holder
                        holder = ""
                    elif k == 25:
                        col4 = holder
                        holder = ""
                    elif k == 30:
                        col5 = holder
                        holder = ""
                    elif k == 34:
                        col6 = holder
                        holder = ""
                    elif k == 36:
                        col7 = holder
                        holder = ""
                    elif k == 45:
                        col8 = holder
                        holder = ""
                    elif k == 47:
                        col9 = holder
                        holder = ""
                    elif k == 50:
                        col10 = holder
                        holder = ""
                    elif k == 58:
                        col11 = holder
                        holder = ""
                    elif k == 66:
                        col12 = holder
                        holder = ""
                    elif k == 72:
                        col13 = holder
                        holder = ""
                    elif k == 75:
                        col14 = holder
                        holder = ""
                    elif k == 77:
                        col15 = holder
                        holder = ""

            if col1 != "":
                ##print col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13, col14, col15
                citylist + (col1, )
                
class MyHTMLParser(HTMLParser):

    dl = []

    def handle_starttag(self, tag, attrs):
        pass

    def handle_endtag(self, tag):
        pass

    def handle_data(self, data):
        MyHTMLParser.dl += [data]

def xerox_parc_url(marklist):
    """ Prepare a URL for the xerox.com map-drawing service,
    with marks at the latitudes and logitudes listed in list-of-pairs
    marklist"""

    avg_lat, avg_lon = max_lat, max_lon = marklist[0]
    marks = ["%f, %f" % marklist[0]]
    for lat, lon in marklist[1:]:
        marks.append("%f, %f" % (lat, lon))
        avg_lat = avg_lat + lat
        avg_lon = avg_lon + lon
        
        if lat > max_lat:
            max_lat = lat
            
        if lon > max_lon:
            max_lon = lon
    avg_lat = avg_lat / len(marklist)
    avg_lon = avg_lon / len(marklist)

    if len(marklist) == 1:
        max_lat, max_lon = avg_lat + 1, avg_lon + 1

    diff = max(max_lat - avg_lat, max_lon - avg_lon)
    D = {'height' : 4 * diff,
         'width' : 4 * diff,
         'lat' : avg_lat,
         'lon' : avg_lon,
         'marks' : ''.join(marks)}

    if JUST_THE_US:
        url = ("http://pubweb.parc.xerox.com/map/db=usa/ht=%(height)f/wd=%(width)f/color=1/mark=%(marks)s/lat=%(lat)f/lon=%(lon)f/") % D
    else:
        url = ("http://pubweb.parc.xerox.com/map/color=1/ht=%(height)f/wd=%(width)f/color=1/mark=%(marks)s/lat=%(lat)f/lon=%(lon)f/") % D

    return url


def findcity(city, state):
    Please_click = re.compile("Please click")
    city_re = re.compile(city)
    state_re = re.compile(state)
    url = ("""http://www.astro.ch/cgi-bin/atlw3/aq.cgi?expr=%s&lang=e""" % (string.replace(city, " ", "+") + "%2C+" + state))
    lst = []
    found_please_click = 0
    
    inf = urllib.FancyURLopener().open(url)

    for x in inf.readlines():
        x = x[:-1]
        if Please_click.search(x) != None:
            found_please_click = 1

        if (city_re.search(x) != None and state_re.search(x) != None and found_please_click):
            L = []
            for y in string.split(x, '<'):
                L = L + string.split(y, '>')
            lst.append(filter(None, L))
    inf.close()
    try:
        x = lst[0]
        lat, lon = x[6], x[10]
    except IndexError:
        raise CityNotFound("Not found: %s, %s" (city, state))


    def getdegrees(x, dividers):
        if string.count(x, dividers[0]):
            x = map(int, string.split(x, dividers[0]))
            return x[0] + (x[1] / 60.)
    
        elif string.count(x, dividers[1]):
            x = map(int, string.split(x, dividers[1]))
            return -(x[0] + (x[1] / 60.))

        else:
            raise CityNotFound("Bogus result (%s)" % x)

    return getdegrees(lat, "ns"), getdegrees(lon, "ew")

def showcities(citylist):
    marklist = []
    
    for city, state in citylist:
        
        try:
            lat, lon = findcity(city, state)
            print ("%s, %s:" % (city, state)), lat, lon
            marklist.append((lat, lon))
        except CityNotFound, message:
            print "%s, %s: not in database? (%s)" % (city, state, message)
    url = xerox_parc_url(marklist)
    ##webbrowser.open(url)
    
citylist = (("Natick" , "MA"),
            ("Rhinebeck" , "NY"),
            ("New Haven" , "CT"),
            ("King of Prussia" , "PA"))

citylist1 = (("Melbourne", "Australia"),
             ("Bendigo", "Australia"),
            ("Walget" , "Australia"),
            ("Abilene" , "Texas")
             )

citylist2 = (("Munich" , "Germany"),
            ("London" , "England"),
            ("Madrid" , "Spain"),
            ("Paris" , "France"),
            ("Canberra" , "Australia"))


if __name__ == '__main__':

    citylist3 = (("Melbourne", "Australia"),
                 ("Bendigo", "Australia"),
                 ("Walget" , "Australia"),
                 ("Abilene" , "Texas"))

    bomAus = [["Tasmainia", "+------------------------------------- start ----------------------------------+\n", "", "http://www.bom.gov.au/cgi-bin/wrap_fwo.pl?IDY03024.txt", "txtHrlyObs"],
                #["Northern Territory", "+------------------------------------- start ----------------------------------+\n", "", "http://www.bom.gov.au/cgi-bin/wrap_fwo.pl?IDY03025.txt", "txtHrlyObs"],
                #["Western Australia", "+------------------------------------- start ----------------------------------+\n", "", "http://www.bom.gov.au/cgi-bin/wrap_fwo.pl?IDY03028.txt", "txtHrlyObs"],
                #["Queensland", "+------------------------------------- start ----------------------------------+\n", "", "http://www.bom.gov.au/cgi-bin/wrap_fwo.pl?IDY03026.txt", "txtHrlyObs"],
                #["New South Wales", "+------------------------------------- start ----------------------------------+\n", "", "http://www.bom.gov.au/cgi-bin/wrap_fwo.pl?IDY03022.txt", "txtHrlyObs"],
                #["Victoria", "+------------------------------------- start ----------------------------------+\n", "", "http://www.bom.gov.au/cgi-bin/wrap_fwo.pl?IDY03023.txt", "txtHrlyObs"],
                #["South Australia", "+------------------------------------- start ----------------------------------+\n", "", "http://www.bom.gov.au/cgi-bin/wrap_fwo.pl?IDY03027.txt", "txtHrlyObs"],
                ]

    for a in range(len(bomAus)):
        download_and_parse(bomAus[a][1], bomAus[a][2], bomAus[a][3], bomAus[a][4])

    showcities(citylist3)

##    bomAusAWS = [["Tasmainia", "+------------------------------------- start ----------------------------------+\n", "", "http://www.bom.gov.au/cgi-bin/wrap_fwo.pl?IDY03024.txt", "txtHrlyObs"],
##                 ["Northern Territory", "+------------------------------------- start ----------------------------------+\n", "", "http://www.bom.gov.au/cgi-bin/wrap_fwo.pl?IDY03025.txt", "txtHrlyObs"],
##                 ["Western Australia", "+------------------------------------- start ----------------------------------+\n", "", "http://www.bom.gov.au/cgi-bin/wrap_fwo.pl?IDY03028.txt", "txtHrlyObs"],
##                 ["Queensland", "+------------------------------------- start ----------------------------------+\n", "", "http://www.bom.gov.au/cgi-bin/wrap_fwo.pl?IDY03026.txt", "txtHrlyObs"],
##                 ["New South Wales", "+------------------------------------- start ----------------------------------+\n", "", "http://www.bom.gov.au/cgi-bin/wrap_fwo.pl?IDY03022.txt", "txtHrlyObs"],
##                 ["Victoria", "+------------------------------------- start ----------------------------------+\n", "", "http://www.bom.gov.au/cgi-bin/wrap_fwo.pl?IDY03023.txt", "txtHrlyObs"],
##                 ["South Australia", "+------------------------------------- start ----------------------------------+\n", "", "http://www.bom.gov.au/cgi-bin/wrap_fwo.pl?IDY03027.txt", "txtHrlyObs"],
##                 ["NSW", "Albury Airport", "NOTE:", "http://www.bom.gov.au/products/IDN65091.shtml", "AWS"],
##                 ["VIC", "Aireys Inlet", " Automatic Weather Station Location Maps", "http://www.bom.gov.au/products/IDV65119.shtml", "AWS"]
##                 ]