Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

The outline of this page is :

1) Problem description

2) Program flow

3) Test data file and caveats


Data date of  predefined data set is: 2019-10-15 till 2019-10-17

1) Description


The WIGOS id contains four parts such as 0-2XXXX-0-YYYYY, 

...

old stations and their  WIGOS ids.


2)Program description

Code Block
languagepy
'''
Created on 22 Oct 2019


# Copyright 2005-2018 ECMWF.
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction
   
This is a test program to encode Wigos Synop
requires
   
1) ecCodes version 2.14.81 or above (available at https://confluence.ecmwf.int/display/ECC/Releases)
2) python3.6.8-01
   
To run the program
   
-i <input bufr >./addWigosProg.py  -m <mode [web|json]>  -l <logFile>  -o <output BUFR file>i synop_multi_subset.bufr -o out_synop_multisubset.bufr  -w WIGOS_TEMP_IDENT.csv
   
      
Uses BUFR version 4 template  and adds the WIGOS Identifier 301150
REQUIRES TablesVersionNumber above 28
   
Author : Roberto Ribas Garcia ECMWF 28/10/2019

'''Modifications
from  eccodes import *
importperformance argparseimprovement 
import( jsonuses 
importskipExtraKeyAttributes) re 
import pandas as pd 
import numpy as np 
import logging 
import requests 
import os 

def read_cmd_line():
    p=argparse.ArgumentParser()
    p.add_argument("-i","--input",help="input bufr file")
    p.add_argument("-o","--output",help="output bufr file with wigos")
    p.add_argument("-m","--mode",choices=["web","json"],help=" wigos source [ json file or web ]")
    p.add_argument("-l","--logfile",help="log file ")
    args=p.parse_args()
    return args 
    
def read_oscar_json(jsonFile):
    with open(jsonFile,"r") as f:
        jtext=json.load(f)
    return jtext 

def read_oscar_web(oscarURL="https://oscar.wmo.int/surface/rest/api/search/station?"):
    r=requests.get(oscarURL)
    jtext=json.loads(r.text)
    return jtext 

def parse_json_into_dataframe(jtext):
    '''
    parses the JSON from the file wigosJsonFile
    filters the stations by wigosStationIdentifiers key in the dictionaries
    '''
and codes_clone   04/11/2019
    changes for SYNOP and TEMP messages                                       05/11/2019
    fixed codes_clone issue                                                   05/11/2019

'''
from eccodes import *
import argparse 
import json 
import re 
import pandas as pd 
import numpy as np 
import logging 
import requests 
import os 

def read_cmd_line():
    p=argparse.ArgumentParser()
    p.add_argument("-i","--input",help="input bufr file")
    p.add_argument("-o","--output",help="output bufr file with wigos")
    p.add_argument("-m","--mode",choices=["web","json"],help=" wigos source [ json file or web ]")
    p.add_argument("-l","--logfile",help="log file ")
    args=p.parse_args()
    return args 
    
def read_oscar_json(jsonFile):
    with open(jsonFile,"r") as f:
        jtext=json.load(f)
    return jtext 

def read_oscar_web(oscarURL="https://oscar.wmo.int/surface/rest/api/search/station?"):
    r=requests.get(oscarURL)
    jtext=json.loads(r.text)
    return jtext 

def parse_json_into_dataframe(jtext):
    '''
    parses the JSON from the file wigosJsonFile
    filters the stations by wigosStationIdentifiers key in the dictionaries
    '''
    
    wigosStations=[]
    nowigosStations=[]
    for d in jtext:
        if "wigosStationIdentifiers" in d.keys():
            wigosStations.append(d)
        else:
            nowigosStations.append(d)
    
    '''
    uses only the wigos 0-20XXX-0-YYYYY (surface)
    '''
    p=re.compile("0-20\d{3}-0-\d{5}")

    fwigosStations=[]
    for d in wigosStations:
        wigosInfo=d["wigosStationIdentifiers"]
        for e in wigosInfo:
            if e["primary"]==True:
                wigosId=e["wigosStationIdentifier"]
                if p.match(wigosId):
                    wigosParts=wigosId.split("-")
                    d["wigosIdentifierSeries"]=wigosParts[0]
                    d["wigosIssuerOfIdentifier"]=wigosParts[1]
                    d["wigosIssueNumber"]=wigosParts[2]
                    d["wigosLocalIdentifierCharacter"]=wigosParts[3]
                    d["oldID"]=wigosParts[3][-5:]
                    fwigosStations.append(d)
                    
    wigosStations=[]df=pd.DataFrame(fwigosStations)
    nowigosStationsdf=[]
    for d in jtext:
df[["longitude","latitude","name","wigosStationIdentifiers","wigosIdentifierSeries","wigosIssuerOfIdentifier","wigosIssueNumber",
           if "wigosStationIdentifiers" in d.keys():"wigosLocalIdentifierCharacter","oldID"]]  
    return        wigosStations.append(d)df

def get_ident(bid):
        else:'''
    gets the ident of the message by  nowigosStations.append(d)
    
    '''combining blockNumber and stationNumber keys from the input BUFR file
    usesthe onlyident themay wigos 0-20XXX-0-YYYYY (surface)
    '''
    p=re.compile("0-20\d{3}-0-\d{5}")
be single valued or multivalued ( only single valued are considered further)
    fwigosStations=[]
    '''
  for d inident=None wigosStations:
    if ( codes_is_defined(bid, "blockNumber")  wigosInfo=d["wigosStationIdentifiers"]and codes_is_defined(bid,"stationNumber") ):
        for e in wigosInfo:
blockNumber=codes_get_array(bid,"blockNumber")
        stationNumber=codes_get_array(bid,"stationNumber")
        if e["primary"]==True len(blockNumber)==1 and len(stationNumber)==1:
                wigosId=e["wigosStationIdentifier"]ident="{0:02d}{1:03d}".format(int(blockNumber),int(stationNumber))
        elif len(blockNumber)==1 and len(stationNumber)!=1:
     if p.match(wigosId):
      blockNumber=np.repeat(blockNumber,len(stationNumber))
              wigosParts=wigosId.split("-")
                    d["wigosIdentifierSeries"]=wigosParts[0]ident=[str("{0:02d}{1:03d}".format(b,s)) for b,s in zip(blockNumber,stationNumber) 
                    d["wigosIssuerOfIdentifier"]=wigosParts[1]
      if b!=CODES_MISSING_LONG and s!=CODES_MISSING_LONG] 
        elif len(blockNumber)!=1     d["wigosIssueNumber"]=wigosParts[2]and len(stationNumber)!=1:
                    d["wigosLocalIdentifierCharacter"]=wigosParts[3]
       ident=[str("{0:02d}{1:03d}".format(b,s)) for b,s in zip(blockNumber,stationNumber) 
             d["oldID"]=wigosParts[3][-5:]
      if b!=CODES_MISSING_LONG and s!=CODES_MISSING_LONG]
        '''
   fwigosStations.append(d)
     here only the first element   of the list is returned to the main program
    df=pd.DataFrame(fwigosStations)
    df=df[["longitude","latitude","name","wigosStationIdentifiers","wigosIdentifierSeries","wigosIssuerOfIdentifier","wigosIssueNumber",
           "wigosLocalIdentifierCharacter","oldID"]]  
    return df

def get_ident(bid):
this avoids lists being used in the dataframe query and breaking the logic
        '''
     gets the ident of the message by combining blockNumber and stationNumber keys from the input BUFR fileif isinstance(ident,list):
            ident=ident[0]
    thereturn ident may be single valued or multivalued ( only single valued are considered further)


    

def add_wigos_info(ident,bid,odf,obid):
    '''
    add the wigos information ident=None to the message ident pointed by bid
    ifthe ( codes_is_defined(bid, "blockNumber") and codes_is_defined(bid,"stationNumber") ):odf contains the WIGOS information for ident 
    obid is the  blockNumber=codes_get_array(bid,"blockNumber")output handle
     '''
   stationNumber=codes_get_array(bid,"stationNumber")
    
    if len(blockNumber)==1 and len(stationNumber)==1codes_is_defined(bid, "shortDelayedDescriptorReplicationFactor"):
            ident="{0:02d}{1:03d}".format(int(blockNumber),int(stationNumber))shortDelayed=codes_get_array(bid,"shortDelayedDescriptorReplicationFactor")
    else:
     elif len(blockNumber)==1 and len(stationNumber)!=1:
       shortDelayed=None 

     blockNumber=np.repeat(blockNumber,len(stationNumber))if codes_is_defined(bid, "delayedDescriptorReplicationFactor"):
            ident=[str("{0:02d}{1:03d}".format(b,s)) for b,s in zip(blockNumber,stationNumber) 
delayedDesc=codes_get_array(bid,"delayedDescriptorReplicationFactor")
    else:
        delayedDesc=None 
        
    if b!=CODES_MISSING_LONG and s!=CODES_MISSING_LONG] 
   codes_is_defined(bid, "extendedDelayedDescriptorReplicationFactor"):
        extDelayedDesc=codes_get_array(bid,"extendedDelayedDescriptorReplicationFactor")
    else:
     elif len(blockNumber)!=1 and len(stationNumber)!=1:extDelayedDesc=None 

        
    ident=[str("{0:02d}{1:03d}".format(b,s)) for b,s in zip(blockNumber,stationNumber) nsubsets=codes_get(bid,"numberOfSubsets")
    compressed=codes_get(bid,"compressedData")
    
           masterTablesVersionNumber=codes_get(bid,"masterTablesVersionNumber")
    if b!=CODES_MISSING_LONG and s!=CODES_MISSING_LONG]
masterTablesVersionNumber<28:
        masterTablesVersionNumber=28
      
  
  return ident 

def add_wigos_info(ident,bid,wdf,obid): unexpandedDescriptors=codes_get_array(bid,"unexpandedDescriptors")
    '''
    add the wigos information to the message ident pointed by bidoutUD=list(unexpandedDescriptors)
    outUD.insert(0,301150)
        
    '''
    theonly wdftreat is the wholeuncompressed wigosmessages dataframewith and1 obid is the output bidsubset 
    '''
for future add treatment 
of compressed messages with 
more than   if codes_is_defined(bid, "shortDelayedDescriptorReplicationFactor"):1 subset
    '''
    shortDelayed=codes_get_array(bid,"shortDelayedDescriptorReplicationFactor")
    if compressed==0 and elsensubsets==1:
        shortDelayed=None 

'''
    if codes_is_defined(bid, "delayedDescriptorReplicationFactor"):
  IMPORTANT, takes into account delayed replications delayedDesc=codes_get_array(bid,"delayedDescriptorReplicationFactor")
 ( all possible cases) to accommodate
   else:
     SYNOP + TEMP delayedDesc=Nonemessages 
        '''
       
 if shortDelayed is  nsubsets=codes_get(bid,"numberOfSubsets")not None:
    compressed=codes_get(bid,"compressedData")
    
    masterTablesVersionNumber=codes_set_getarray(bidobid,"masterTablesVersionNumberinputShortDelayedDescriptorReplicationFactor",shortDelayed)
    if masterTablesVersionNumber<28:
    if delayedDesc is not masterTablesVersionNumber=28None:
        
    unexpandedDescriptors=codes_getset_array(bidobid,"unexpandedDescriptors")
    outUD=list(unexpandedDescriptors)
    outUD.insert(0,301150inputDelayedDescriptorReplicationFactor",delayedDesc)
        
if extDelayedDesc is not '''None:
    only treat the uncompressed messages with 1 subset 
    for future add treatment of compressed messages with more than 1 subsetcodes_set_array(obid,"inputExtendedDelayedDescriptorReplicationFactor",extDelayedDesc)
            

    '''
    codes_set(obid,"masterTablesVersionNumber",masterTablesVersionNumber)
    if  compressed==0 and nsubsets==1: codes_set(obid,"numberOfSubsets",nsubsets)
        if
 shortDelayed is not None:
    
        codes_set_array(obid, "inputShortDelayedDescriptorReplicationFactorunexpandedDescriptors",shortDelayedoutUD)
        if delayedDesc is not None:
wis=odf["wigosIdentifierSeries"].values 
        if len(wis)!=1:
      codes_set_array(obid,"inputDelayedDescriptorReplicationFactor",delayedDesc)
        codes_set(obid,"masterTablesVersionNumber",masterTablesVersionNumber)wis=wis[0]
        codes_set(obid,"numberOfSubsetswigosIdentifierSeries",nsubsetsint(wis))
        odf=wdf.query("oldID=='{0}'".format(ident))wid=odf["wigosIssuerOfIdentifier"].values 
        if not odf.emptylen(wid)!=1:
            wid=wid[0]
        codes_set_array(obid, "unexpandedDescriptorswigosIssuerOfIdentifier",outUDint(wid))
            wiswin=odf["wigosIdentifierSerieswigosIssueNumber"].values 
            if len(wiswin)!=1:
                wis=wiswin=win[0]
            codes_set(obid,"wigosIdentifierSerieswigosIssueNumber",int(wiswin))
            wid=odf["wigosIssuerOfIdentifier"].values 
            if len(wid)!=1:
      wlid=odf["wigosLocalIdentifierCharacter"].values 
          wid=widwlid="{0:5}".format(wlid[0])
        logging.info(" wlid   codes_set(obid,"wigosIssuerOfIdentifier",int(widhere {0}".format(wlid))
            win=odf["wigosIssueNumber"].values 
 codes_set(obid,"wigosLocalIdentifierCharacter",str(wlid))
        codes_bufr_copy_data(bid,obid)
   if len(win)!=1 else:
        logging.info(" skipping compressed  message id {0}  win=win[0]with {1} subsets ".format(ident,nsubsets))
    
    return 
   codes_set(obid,"wigosIssueNumber",int(win))  
     

def main():
    print("ecCodes version {0}".format(codes_get_api_version()))
    args=read_cmd_line()
    logfile=args.logfile 
   wlid=odf["wigosLocalIdentifierCharacter"].values 
 logging.basicConfig(filename=logfile,level=logging.INFO,filemode="w")
    
    infile=args.input 
    wlid="{0:5}".format(wlid[0])
    outfile=args.output 
   
    logging.info(" wlid here {0}".format(wlid))
   mode=args.mode 
    if mode=="web":
         codes_set(obid,"wigosLocalIdentifierCharacter",str(wlid))jtext=read_oscar_web()
        cdirectory=os.getcwd()
        codes_bufr_copy_data(bid,obidoscarFile=os.path.join(cdirectory,"oscar.json")
        else:
       with open(oscarFile,"w") as f:
     logging.info(" wigos {0} is empty for ident {1}".format(ident,odf["wigosLocalIdentifierCharacter"].values)json.dump(jtext,f)
    else:
        loggingcdirectory=os.infogetcwd(")
 skipping compressed  message id {0} with {1} subsets ".format(ident,nsubsets)) oscarFile=os.path.join(cdirectory,"oscar.json")
    
    return obid
    with open(oscarFile,"r") as f:
     

def main():
    args=read_cmd_line()
    logfilejtext=args.logfile 
    logging.basicConfig(filename=logfile,level=logging.INFO,filemode="w"json.load(f)
    
    infile=args.input 
  
  
    outfile=args.output 
   
    mode=args.mode 
    if mode=="web":wigosDf=parse_json_into_dataframe(jtext)
    
    jtext=read_oscar_web(f=open(infile,"rb")
        cdirectory=os.getcwd(nmsg=codes_count_in_file(f)
        oscarFile=os.path.join(cdirectory,"oscar.jsonfout=open(outfile,"wb")
    for i in  with open(oscarFile,"w") as frange(0,nmsg):
        bid=codes_bufr_new_from_file(f)
    json.dump(jtext,f)
    else:obid=codes_clone(bid)
        cdirectory=os.getcwd(codes_set(bid, 'skipExtraKeyAttributes', 1)
        oscarFile=os.path.join(cdirectory,"oscar.json"codes_set(bid,"unpack",1)
        with open(oscarFile,"r") as f:ident=get_ident(bid)
            jtext=json.load(f)
        if   ident:
       
     logging.info (" \t 
message {0} ident  wigosDf=parse_json_into_dataframe(jtext)
{1} ".format(i+1,ident))

    
    f=open(infile,"rb")
    nmsg=codes_count_in_file(f)
odf=wigosDf.query("oldID=='{0}'".format(ident))       fout=open(outfile,"wb")
    for i in range(0,nmsg):
  
      obid=codes_bufr_new_from_samples("BUFR4")
      if  bid=codes_bufr_new_from_file(f)not odf.empty:
        codes_set(bid,"unpack",1)
        ident=get_ident(bidadd_wigos_info(ident,bid, odf,obid)
        if ident:
       codes_write(obid,fout)
     logging.info (" \t message {0} ident {1} ".format(i+1,ident))
   else:
          add_wigos_info(ident,bid, wigosDf, obid)
    logging.info(" wigos {0} is empty for   codes_write(obid,foutident {1}".format(ident,odf["wigosLocalIdentifierCharacter"].values))
    
        else:
            logging.info ("message {0} rejected ".format(i+1))
        codes_release(obid)        
        codes_release(bid)
    f.close()    
   
    print (" finished")


if __name__ == '__main__':
    main()

...

that are uncompressed ( compressed =0) and single subset ( numberOfSubsets=1) if their ident matches the ones in wigosDfmatches the ones in wigosDf.

5) If  get_ident function founds many idents on a message only returns the first one.


During program execution a log  file is generated containing information about the processing.

...

  • Only uncompressed messages  (compressed =0) and  single subset (numberOfSubsets=1) are considered
  • The Oscar information retrieved from the web server has to be cleared for this program to work. This is the goal of the function parse_json_into_dataframe that uses regular expressions to filter out the WIGOS data.
  • When setting the WIGOS information It is important to preserve the data types , for example "wigosLocalIdentifierCharacter" is a character string. 
  • The masterTablesVersionNumber must be above 28 otherwise no WIGOS ids can be added. This is done in the add_wigos_info function that updates the table version number key for each message processed.


Results


The output file contains 22724 messages 19543  SYNOP messages obtained from running the program on a input BUFR file containing raw SYNOP data received through GTS




View file
nameout_synop_wigos.bufr
height250

This file contains 7 TEMP messages obtained running the program on a BUFR file containing raw TEMP messages.

View file
namenewOutputout_temp_wigos.bufr
height250