How to get all available stations and the .eml files (station information including station code, latitude, longitude, start time and end time) from IRIS, and using these available station to request timeseries data?
1. You can check data summary page here: http://ds.iris.edu/SeismiQuery/summaries.htm to find any available Networks and its respective time span.
2. Then go to request page: http://ds.iris.edu/SeismiQuery/station.htm to fill the column and sent request.
3. However, the problem is that there are so many networks (including same name but in different area), for example the XC network. Request all these stations is cumbersome.
A web Crawler can help you manage these problem, including filter the location that you are not interested.
1. Get_sta_info.py
↓
# -*- coding: utf-8 -*-
#---------THIS SCRIPT OUTPUT Valid_Net.dat for get_eml.py-----------------------
"""
Created on Tue Nov 8 20:10:55 2016
@author: TimLin
"""
import requests
from bs4 import BeautifulSoup
import os
def chk_sta(yyyy,net):
url='http://ds.iris.edu/SeismiQuery/data_avail/by_network/'+str(yyyy)+'/'+net+'_data_avail_'+str(yyyy)+'.html'
#url='http://ds.iris.edu/SeismiQuery/data_avail/by_network/1999/II_data_avail_1999.html'
data=requests.get(url)
if data.ok==False:
return(False)
else:
print(' access ok check request...')
soup=BeautifulSoup(data.text,'lxml')
A=soup.findAll('a')
if len(A)<5:return(False);
url2='http://ds.iris.edu'+A[1].attrs['href']
data2=requests.get(url2)
A2=BeautifulSoup(data2.text,'lxml')
title=A2.findAll('th')
#get column of CHANNEL
chn_col='Unknown'
for n,tmpcol in enumerate(title):
if tmpcol.text.strip()=='CHANNEL':
chn_col=n
break
else:
continue
if chn_col=='Unlnow':
return('Unknow column')
rem=len(title)
AA2=A2.findAll('td')
#net=[];sta=[];chn=[];
for i in range(len(AA2)):
if i%rem==chn_col:
if AA2[i].text.strip() in ['BHZ','HHZ']:
return(True)
break
else:
continue
return(False)
#get all NET
url='http://ds.iris.edu/SeismiQuery/data_avail/by_network/network_list.html'
allinfo=requests.get(url)
soup=BeautifulSoup(allinfo.text,'lxml')
tmpnet=soup.findAll('a')
allnet=[]
for i in range(len(tmpnet)):
if i==0:continue
allnet.append(tmpnet[i].text)
allnet=list(set(allnet))
#making dir
if not(os.path.isdir('Valid_Net')):os.mkdir('Valid_Net')
#get station summary
url='http://ds.iris.edu/cgi-bin/seismiquery/bin/summaries.pl'
#net='XD'
OUT1=open('doneNET.log','r')
line=OUT1.read()
OUT1.close()
#allnet=['AC', 'SN', 'MC', 'PB', 'OO', 'NM', 'IN', '1B', 'ON', 'YE'] <- for testing
for n_allnet in allnet:
OUT1=open('doneNET.log','a')
OUT2=open('Valid_Net.dat','a')
if (n_allnet in line):
continue
else:
OUT1.write("%s\n"%n_allnet)
print('start:'+n_allnet)
net=n_allnet
#set payload
payload=({
'checkbox5':'checkbox',
'cnet':'net',
'net':net,
'sta':'',
'loc':'',
'cha':'',
'quality':'ALL',
'email':'none',
'select':'none',
'where':'none',
'sqlcmd':"select a.network,min(earliest) as EARLIEST, max(latest) as LATEST, restricted,nickname as NET_ABBR, name from channel_quality_summary a, networks n where a.network = n.network and to_char(a.earliest,'YYYY') between n.startyr and n.endyr and a.network like '%s' group by a.network, nickname,name,restricted order by a.network,nickname,EARLIEST"%net,
'count':'none',
'null':'null',
})
tmp=requests.post(url,data=payload)
soup2=BeautifulSoup(tmp.text,'lxml')
alltbl=soup2.findAll('td')
network=[];sttime=[];edtime=[];rest=[];netattr=[];name=[]
for j in range(len(alltbl)):
if j%7==0:
network.append(alltbl[j].text.strip())
elif j%7==1:
sttime.append(alltbl[j].text.strip())
elif j%7==2:
edtime.append(alltbl[j].text.strip())
elif j%7==3:
rest.append(alltbl[j].text.strip())
elif j%7==4:
netattr.append(alltbl[j].text.strip())
elif j%7==5:
name.append(alltbl[j].text.strip())
else:
continue
flag=[]
for nNet,tmp in enumerate(sttime):
print(' check NET('+str(nNet+1)+'/'+str(len(network))+'):')
tmpnet=network[nNet]
sty=sttime[nNet].split('-')[0]
edy=edtime[nNet].split('-')[0]
yyyy=range(int(sty),int(edy)+1)
yyyy=yyyy[-1::-1]
flag.append(0)
n=0
for tmpy in yyyy:
print(' Year:'+str(tmpy))
if chk_sta(tmpy,tmpnet):
flag[-1]=1
#OUT2=open('Valid_Net.dat','a')
OUT2.write('%s\n'%(tmpnet+'_'+sty+'_'+edy+'_'+netattr[nNet]))
break
else:
n=n+1
if n>3:
break
else:
continue
if flag[-1]==0:
print(' Not Found!')
OUT1.close()
OUT2.close()
2. get_eml.py
↓
# -*- coding: utf-8 -*-
"""
Created on Wed Nov 9 15:17:20 2016
@author: tim
"""
import requests
from bs4 import BeautifulSoup
import time
import os
url='http://ds.iris.edu/cgi-bin/seismiquery/bin/station.pl'
if not(os.path.exists('eml.log')):TMP=open('eml.log','w');TMP.close()
LOG=open('eml.log','r')
oldline=LOG.read()
LOG.close()
#read all net
IN1=open('Valid_Net.dat','r')
for line in IN1.readlines():
if line in oldline:continue;
print('Start:%s'%line.strip())
Net=line.split('_')[0]
sttime='01/01/'+line.split('_')[1]
edtime='12/31/'+line.split('_')[2]
payload=({
'checkbox':'checkbox',
'clat':'latitude',
'clon':'longitude',
'lat2':'',
'lon1':'',
'lon2':'',
'lat1':'',
'vnet':'',
'cnet':'network',
'network':Net,
'csta':'station',
'station':'',
'elev1':'',
'elev2':'',
'cstm':'starttime',
'startdate':sttime,
'cetm':'endtime',
'enddate':edtime,
'csite':'site',
'site':'',
'netaff':'',
'null':'null',
'select':"select distinct station.network,station.station,station.starttime,station.endtime,to_char(station.latitude,'S00.999999') as LAT,to_char(station.longitude,'S000.999999') as LON,station.site",
'parms':"station.starttime <= to_date('%s','MM/DD/YYYY') and station.endtime > to_date('%s','MM/DD/YYYY')"%(edtime,sttime),
'starttime':sttime,
'endtime':edtime,
'email':'none',
'count':'0',
})
try:
data=requests.post(url,data=payload)
soup=BeautifulSoup(data.text,'lxml')
tabD=soup.findAll('td')
tabH=soup.findAll('th')
OUT1=open('./Valid_Net/'+Net+'-'+line.split('_')[1]+'-'+line.split('_')[2]+'.eml','w')
for nH in range(len(tabH)):
OUT1.write('%s '%(tabH[nH].text.strip()))
#OUT1.write('%s %s %s %s %s'%(tabH[0].text.strip(),tabH[1].text.strip(),tabH[2].text.strip(),tabH[3].text.strip(),tabH[4].text.strip()))
for i in range(len(tabD)):
if i%len(tabH)==0:
OUT1.write('\n')
OUT1.write('%s '%(tabD[i].text.strip()))
else:
try:
OUT1.write('%s '%(tabD[i].text.strip()))
except:
OUT1.write('- ')
OUT1.close()
LOG=open('eml.log','a')
LOG.write('%s'%(line))
LOG.close()
time.sleep(1)
except:
OUT2=open('error.log','a')
OUT2.write('%s'%(line))
OUT2.close()
time.sleep(1)
IN1.close()
And the results are automatically saved in the directory Valid_Net
↓