IRIS station data

How to get all available stations and the .eml files (station information including station code, latitude, longitude, start time and end time) from IRIS, and using these available station to request timeseries data?

1. You can check data summary page here: http://ds.iris.edu/SeismiQuery/summaries.htm to find any available Networks and its respective time span.

2. Then go to request page: http://ds.iris.edu/SeismiQuery/station.htm to fill the column and sent request.

3. However, the problem is that there are so many networks (including same name but in different area), for example the XC network. Request all these stations is cumbersome.

A web Crawler can help you manage these problem, including filter the location that you are not interested.

1. Get_sta_info.py

↓

# -*- coding: utf-8 -*-

#---------THIS SCRIPT OUTPUT Valid_Net.dat for get_eml.py-----------------------

"""

Created on Tue Nov 8 20:10:55 2016

@author: TimLin

"""

import requests

from bs4 import BeautifulSoup

import os

def chk_sta(yyyy,net):

url='http://ds.iris.edu/SeismiQuery/data_avail/by_network/'+str(yyyy)+'/'+net+'_data_avail_'+str(yyyy)+'.html'

#url='http://ds.iris.edu/SeismiQuery/data_avail/by_network/1999/II_data_avail_1999.html'

data=requests.get(url)

if data.ok==False:

return(False)

else:

print(' access ok check request...')

soup=BeautifulSoup(data.text,'lxml')

A=soup.findAll('a')

if len(A)<5:return(False);

url2='http://ds.iris.edu'+A[1].attrs['href']

data2=requests.get(url2)

A2=BeautifulSoup(data2.text,'lxml')

title=A2.findAll('th')

#get column of CHANNEL

chn_col='Unknown'

for n,tmpcol in enumerate(title):

if tmpcol.text.strip()=='CHANNEL':

chn_col=n

break

else:

continue

if chn_col=='Unlnow':

return('Unknow column')

rem=len(title)

AA2=A2.findAll('td')

#net=[];sta=[];chn=[];

for i in range(len(AA2)):

if i%rem==chn_col:

if AA2[i].text.strip() in ['BHZ','HHZ']:

return(True)

break

else:

continue

return(False)

#get all NET

url='http://ds.iris.edu/SeismiQuery/data_avail/by_network/network_list.html'

allinfo=requests.get(url)

soup=BeautifulSoup(allinfo.text,'lxml')

tmpnet=soup.findAll('a')

allnet=[]

for i in range(len(tmpnet)):

if i==0:continue

allnet.append(tmpnet[i].text)

allnet=list(set(allnet))

#making dir

if not(os.path.isdir('Valid_Net')):os.mkdir('Valid_Net')

#get station summary

url='http://ds.iris.edu/cgi-bin/seismiquery/bin/summaries.pl'

#net='XD'

OUT1=open('doneNET.log','r')

line=OUT1.read()

OUT1.close()

#allnet=['AC', 'SN', 'MC', 'PB', 'OO', 'NM', 'IN', '1B', 'ON', 'YE'] <- for testing

for n_allnet in allnet:

OUT1=open('doneNET.log','a')

OUT2=open('Valid_Net.dat','a')

if (n_allnet in line):

continue

else:

OUT1.write("%s\n"%n_allnet)

print('start:'+n_allnet)

net=n_allnet

#set payload

payload=({

'checkbox5':'checkbox',

'cnet':'net',

'net':net,

'sta':'',

'loc':'',

'cha':'',

'quality':'ALL',

'email':'none',

'select':'none',

'where':'none',

'sqlcmd':"select a.network,min(earliest) as EARLIEST, max(latest) as LATEST, restricted,nickname as NET_ABBR, name from channel_quality_summary a, networks n where a.network = n.network and to_char(a.earliest,'YYYY') between n.startyr and n.endyr and a.network like '%s' group by a.network, nickname,name,restricted order by a.network,nickname,EARLIEST"%net,

'count':'none',

'null':'null',

})

tmp=requests.post(url,data=payload)

soup2=BeautifulSoup(tmp.text,'lxml')

alltbl=soup2.findAll('td')

network=[];sttime=[];edtime=[];rest=[];netattr=[];name=[]

for j in range(len(alltbl)):

if j%7==0:

network.append(alltbl[j].text.strip())

elif j%7==1:

sttime.append(alltbl[j].text.strip())

elif j%7==2:

edtime.append(alltbl[j].text.strip())

elif j%7==3:

rest.append(alltbl[j].text.strip())

elif j%7==4:

netattr.append(alltbl[j].text.strip())

elif j%7==5:

name.append(alltbl[j].text.strip())

else:

continue

flag=[]

for nNet,tmp in enumerate(sttime):

print(' check NET('+str(nNet+1)+'/'+str(len(network))+'):')

tmpnet=network[nNet]

sty=sttime[nNet].split('-')[0]

edy=edtime[nNet].split('-')[0]

yyyy=range(int(sty),int(edy)+1)

yyyy=yyyy[-1::-1]

flag.append(0)

n=0

for tmpy in yyyy:

print(' Year:'+str(tmpy))

if chk_sta(tmpy,tmpnet):

flag[-1]=1

#OUT2=open('Valid_Net.dat','a')

OUT2.write('%s\n'%(tmpnet+'_'+sty+'_'+edy+'_'+netattr[nNet]))

break

else:

n=n+1

if n>3:

break

else:

continue

if flag[-1]==0:

print(' Not Found!')

OUT1.close()

OUT2.close()

2. get_eml.py

↓

# -*- coding: utf-8 -*-

"""

Created on Wed Nov 9 15:17:20 2016

@author: tim

"""

import requests

from bs4 import BeautifulSoup

import time

import os

url='http://ds.iris.edu/cgi-bin/seismiquery/bin/station.pl'

if not(os.path.exists('eml.log')):TMP=open('eml.log','w');TMP.close()

LOG=open('eml.log','r')

oldline=LOG.read()

LOG.close()

#read all net

IN1=open('Valid_Net.dat','r')

for line in IN1.readlines():

if line in oldline:continue;

print('Start:%s'%line.strip())

Net=line.split('_')[0]

sttime='01/01/'+line.split('_')[1]

edtime='12/31/'+line.split('_')[2]

payload=({

'checkbox':'checkbox',

'clat':'latitude',

'clon':'longitude',

'lat2':'',

'lon1':'',

'lon2':'',

'lat1':'',

'vnet':'',

'cnet':'network',

'network':Net,

'csta':'station',

'station':'',

'elev1':'',

'elev2':'',

'cstm':'starttime',

'startdate':sttime,

'cetm':'endtime',

'enddate':edtime,

'csite':'site',

'site':'',

'netaff':'',

'null':'null',

'select':"select distinct station.network,station.station,station.starttime,station.endtime,to_char(station.latitude,'S00.999999') as LAT,to_char(station.longitude,'S000.999999') as LON,station.site",

'parms':"station.starttime <= to_date('%s','MM/DD/YYYY') and station.endtime > to_date('%s','MM/DD/YYYY')"%(edtime,sttime),

'starttime':sttime,

'endtime':edtime,

'email':'none',

'count':'0',

})

try:

data=requests.post(url,data=payload)

soup=BeautifulSoup(data.text,'lxml')

tabD=soup.findAll('td')

tabH=soup.findAll('th')

OUT1=open('./Valid_Net/'+Net+'-'+line.split('_')[1]+'-'+line.split('_')[2]+'.eml','w')

for nH in range(len(tabH)):

OUT1.write('%s '%(tabH[nH].text.strip()))

#OUT1.write('%s %s %s %s %s'%(tabH[0].text.strip(),tabH[1].text.strip(),tabH[2].text.strip(),tabH[3].text.strip(),tabH[4].text.strip()))

for i in range(len(tabD)):

if i%len(tabH)==0:

OUT1.write('\n')

OUT1.write('%s '%(tabD[i].text.strip()))

else:

try:

OUT1.write('%s '%(tabD[i].text.strip()))

except:

OUT1.write('- ')

OUT1.close()

LOG=open('eml.log','a')

LOG.write('%s'%(line))

LOG.close()

time.sleep(1)

except:

OUT2=open('error.log','a')

OUT2.write('%s'%(line))

OUT2.close()

time.sleep(1)

IN1.close()

And the results are automatically saved in the directory Valid_Net

↓