137 lines
4.0 KiB
Python
137 lines
4.0 KiB
Python
"""
|
|
Example script that scrapes data from the IEM ASOS download service
|
|
"""
|
|
from __future__ import print_function
|
|
import json
|
|
import time
|
|
import datetime
|
|
|
|
# Python 2 and 3: alternative 4
|
|
try:
|
|
from urllib.request import urlopen
|
|
except ImportError:
|
|
from urllib2 import urlopen
|
|
|
|
# Number of attempts to download data
|
|
MAX_ATTEMPTS = 6
|
|
# HTTPS here can be problematic for installs that don't have Lets Encrypt CA
|
|
SERVICE = "http://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?"
|
|
|
|
|
|
def download_data(uri):
|
|
"""Fetch the data from the IEM
|
|
|
|
The IEM download service has some protections in place to keep the number
|
|
of inbound requests in check. This function implements an exponential
|
|
backoff to keep individual downloads from erroring.
|
|
|
|
Args:
|
|
uri (string): URL to fetch
|
|
|
|
Returns:
|
|
string data
|
|
"""
|
|
attempt = 0
|
|
while attempt < MAX_ATTEMPTS:
|
|
try:
|
|
data = urlopen(uri, timeout=300).read().decode("utf-8")
|
|
if data is not None and not data.startswith("ERROR"):
|
|
return data
|
|
except Exception as exp:
|
|
print("download_data(%s) failed with %s" % (uri, exp))
|
|
time.sleep(5)
|
|
attempt += 1
|
|
|
|
print("Exhausted attempts to download, returning empty data")
|
|
return ""
|
|
|
|
|
|
def get_stations_from_filelist(filename):
|
|
"""Build a listing of stations from a simple file listing the stations.
|
|
|
|
The file should simply have one station per line.
|
|
"""
|
|
stations = []
|
|
for line in open(filename):
|
|
stations.append(line.strip())
|
|
return stations
|
|
|
|
|
|
def get_stations_from_networks():
|
|
"""Build a station list by using a bunch of IEM networks."""
|
|
stations = []
|
|
states = """AK AL AR AZ CA CO CT DE FL GA HI IA ID IL IN KS KY LA MA MD ME
|
|
MI MN MO MS MT NC ND NE NH NJ NM NV NY OH OK OR PA RI SC SD TN TX UT VA VT
|
|
WA WI WV WY"""
|
|
networks = []
|
|
for state in states.split():
|
|
networks.append("%s_ASOS" % (state,))
|
|
|
|
for network in networks:
|
|
# Get metadata
|
|
uri = (
|
|
"https://mesonet.agron.iastate.edu/geojson/network/%s.geojson"
|
|
) % (network,)
|
|
data = urlopen(uri)
|
|
jdict = json.load(data)
|
|
for site in jdict["features"]:
|
|
stations.append(site["properties"]["sid"])
|
|
return stations
|
|
|
|
|
|
def download_alldata():
|
|
"""An alternative method that fetches all available data.
|
|
|
|
Service supports up to 24 hours worth of data at a time."""
|
|
# timestamps in UTC to request data for
|
|
startts = datetime.datetime(2012, 8, 1)
|
|
endts = datetime.datetime(2012, 9, 1)
|
|
interval = datetime.timedelta(hours=24)
|
|
|
|
service = SERVICE + "data=all&tz=Etc/UTC&format=comma&latlon=yes&"
|
|
|
|
now = startts
|
|
while now < endts:
|
|
thisurl = service
|
|
thisurl += now.strftime("year1=%Y&month1=%m&day1=%d&")
|
|
thisurl += (now + interval).strftime("year2=%Y&month2=%m&day2=%d&")
|
|
print("Downloading: %s" % (now,))
|
|
data = download_data(thisurl)
|
|
outfn = "%s.txt" % (now.strftime("%Y%m%d"),)
|
|
with open(outfn, "w") as fh:
|
|
fh.write(data)
|
|
now += interval
|
|
|
|
|
|
def main():
|
|
"""Our main method"""
|
|
# timestamps in UTC to request data for
|
|
startts = datetime.datetime(2012, 8, 1)
|
|
endts = datetime.datetime(2012, 9, 1)
|
|
|
|
service = SERVICE + "data=all&tz=Etc/UTC&format=comma&latlon=yes&"
|
|
|
|
service += startts.strftime("year1=%Y&month1=%m&day1=%d&")
|
|
service += endts.strftime("year2=%Y&month2=%m&day2=%d&")
|
|
|
|
# Two examples of how to specify a list of stations
|
|
stations = get_stations_from_networks()
|
|
# stations = get_stations_from_filelist("mystations.txt")
|
|
for station in stations:
|
|
uri = "%s&station=%s" % (service, station)
|
|
print("Downloading: %s" % (station,))
|
|
data = download_data(uri)
|
|
outfn = "%s_%s_%s.txt" % (
|
|
station,
|
|
startts.strftime("%Y%m%d%H%M"),
|
|
endts.strftime("%Y%m%d%H%M"),
|
|
)
|
|
out = open(outfn, "w")
|
|
out.write(data)
|
|
out.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
download_alldata()
|
|
# main()
|