Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

A quality control of the observations is needed to avoid encoding wrong values in the output BUFR.

Code Block
#!/usr/bin/python
'''
# Copyright 2005-2018 ECMWF.
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction

This is a test program to encode Wigos Synop 
requires 

1) ecCodes version 2.8 or above (available at https://confluence.ecmwf.int/display/ECC/Releases)
2) python2.7

To run the program

   ./test_wigos_NEW.py -a Ascii_csv_file  -b output_Bufr -w WIGOS_TABLE_FILE -l logFile
   
Uses BUFR version 4 template  301150 307091

Author : Roberto Ribas Garcia ECMWF 27 Sep 2018
Modifications
Author : Roberto Ribas Garcia ECMWF 7 Nov 2018
         added new command line option to read the station codes
         changed logic to read station codes and update bufr accordingly
         added logging information 
         Roberto Ribas Garcia, ECMWF 24 Jan 2019 changed the encoding function
         to have one header. 
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!-------------------------------------- CAVEATS ---------------------------
!!!!!!!!!!!!!!!!!!!!!!!!! A QUALITY CHECK IMPLEMENTATION IS NEEDED
!!!!!!!!!!!!!!!!!!!!!!!!! IT MAY BE INCOMPLETE CHECK AND TEST CAREFULLY
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
'''
import sys
#sys.path.append('/usr/local/lib64/python2.7/site-packages/')
# adapt the path to your installation
sys.path.append("/usr/local/apps/eccodes/2.10.0/GNU/6.3.0/lib/python2.7/site-packages/")

import argparse 
#import ipdb
import pandas as pd
import numpy as np
import logging 
from datetime import datetime
from eccodes import (codes_set, codes_set_array, codes_bufr_new_from_samples,codes_write,codes_get_api_version, 
                     codes_release, CodesInternalError,CODES_MISSING_DOUBLE,CODES_MISSING_LONG) 

    
def read_cmdline():
    '''
    reads the command line to get the input ascii filename and the output bufr file
        usage 
            prog  -a Ascii_input_file  -b Bufr_output_file -l logfile -w WIGOS_TABLE_FILE
    '''
    p = argparse.ArgumentParser()
    p.add_argument('-a', '--ascii', help = 'input Ascii filename')
    p.add_argument('-b', '--bufr', help = 'output Bufr filename')
    p.add_argument("-w", "--wigoscodes", help = "csv with the station codes")
    p.add_argument("-l","--logging", help = "log file")
    args = p.parse_args()
    return args



                

def bufr_encode_new(dfMeteo,dfEst,fout):
    '''
    encodes the bufr file with one BUFR header and loops over the 
    the dfMeteo dataframe. 
    
    dfEst    : is the station WiGOS info read from the WIGOS_TABLE_FILE
    dfMeteo  : contains the meteo information 
    fout     : is an file object opened for writing in the main program 
    
    '''
    
    ibufr = codes_bufr_new_from_samples('BUFR4')
   
    nSubsets=len(dfMeteo.index)
    oneSubsetDelayed=np.array([0, 0, 0, 0, 1, 1, 0, 0, 1, 1,1, 1, 0, 0,],dtype=np.int)
    DelayedDesc=np.tile(oneSubsetDelayed,nSubsets)
    ### this DelayedDesc array is a replication of the oneSubsetDelayed array as many
    ### times as subsets are present in the message. The number of subsets is found
    ### as the number of lines of the dfMeteo dataframe
    ### this is used to populate the key inputShortDelayedDescriptorReplicationFactor 
    codes_set_array(ibufr, 'inputShortDelayedDescriptorReplicationFactor', DelayedDesc)
    codes_set(ibufr, 'edition', 4)
    codes_set(ibufr, 'masterTableNumber', 0)
    codes_set(ibufr, 'bufrHeaderCentre', 43)
    codes_set(ibufr, 'bufrHeaderSubCentre', 0)
    codes_set(ibufr, 'updateSequenceNumber', 0)
    codes_set(ibufr, 'dataCategory', 0)
    codes_set(ibufr, 'internationalDataSubCategory', 7)
    codes_set(ibufr, 'dataSubCategory', 7)
    codes_set(ibufr, 'masterTablesVersionNumber', 28)
    codes_set(ibufr, 'localTablesVersionNumber', 0)
    ### check if this suits your system
    procTime=datetime.now()
    codes_set(ibufr, 'typicalYear', procTime.year)
    codes_set(ibufr, 'typicalMonth', procTime.hour)
    codes_set(ibufr, 'typicalDay', procTime.day)
    codes_set(ibufr, 'typicalHour', procTime.hour)
    codes_set(ibufr, 'typicalMinute', procTime.minute)
    codes_set(ibufr, 'typicalSecond', procTime.second)
    codes_set(ibufr, 'numberOfSubsets', nSubsets)
    codes_set(ibufr, 'observedData', 1)
    codes_set(ibufr, 'compressedData', 0)
    ivalues = ( 301150, 307091,)
    # Create the structure of the data section
    codes_set_array(ibufr, 'unexpandedDescriptors', ivalues)
    ## these are labels to update the different keys  MAY NEED MORE LABELS
    lblHOS1=1
    lblHOS2=1
    lblHOSMP=1
    lblTS=1
    lblTP=1
    ### loops over the dfMeteo 
    nValidMsg=0
    nWrongMsg=0
    for imsg,row in dfMeteo.iterrows():
        stationID=row["station"].strip()
        dfEstInfo=dfEst[dfEst["CODIGO"]==stationID]
        if not dfEstInfo.empty:
            nValidMsg+=1
            key="#{0}#wigosIdentifierSeries".format(imsg+1)
          
            codes_set(ibufr, key,0 )
            key="#{0}#wigosIssuerOfIdentifier".format(imsg+1)
            codes_set(ibufr, key, 76)
            key="#{0}#wigosIssueNumber".format(imsg+1)
            codes_set(ibufr, key, 0)
            
            wigosLocalID=dfEstInfo["WIGOS"].values[0]
            WigosCode=dfEstInfo["WIGOS"].values[0]
            wigosIdChar=WigosCode.split("-")[-1]
            key='#{0}#wigosLocalIdentifierCharacter'.format(imsg+1)
            codes_set(ibufr,key ,wigosIdChar)
            
            key="#{0}#stateIdentifier".format(imsg+1)
            codes_set(ibufr, key, CODES_MISSING_LONG)
            key='#{0}#nationalStationNumber'.format(imsg+1)
            codes_set(ibufr,key , CODES_MISSING_LONG)
            block=dfEstInfo["BLOCK"].values[0]
            key='#{0}#blockNumber'.format(imsg+1)
            codes_set(ibufr, key, block)
            stationNumber=dfEstInfo["STATION"].values[0]
            key='#{0}#stationNumber'.format(imsg+1)
            codes_set(ibufr, key, stationNumber)
            stationName=dfEstInfo["NOME"].values[0]
            key='#{0}#stationOrSiteName'.format(imsg+1)
            codes_set(ibufr,key ,stationName)
            key='#{0}#stationType'.format(imsg+1)
            codes_set(ibufr,key , CODES_MISSING_LONG)
            
            key="#{0}#year".format(imsg+1)
            codes_set(ibufr,key , row["year"])
            key='#{0}#month'.format(imsg+1)
            codes_set(ibufr, key, row["month"])
            key="#{0}#day".format(imsg+1)
            codes_set(ibufr, key, row["day"])
            key="#{0}#hour".format(imsg+1)
            codes_set(ibufr, key, row["ObsHour"])
            key="#{0}#minute".format(imsg+1)
            codes_set(ibufr, key, 0)
            
            stationLat=dfEstInfo["LATITUDE"].values[0]
            stationLon=dfEstInfo["LONGITUDE"].values[0]
            stationAltitude=dfEstInfo["ALTITUDE"].values[0]
            key="#{0}#latitude".format(imsg+1)
            codes_set(ibufr,key,stationLat)
            key="#{0}#longitude".format(imsg+1)
            codes_set(ibufr,key,stationLon)
            key="#{0}#heightOfStationGroundAboveMeanSeaLevel".format(lblHOS1)
            codes_set(ibufr,key,stationAltitude)
            lblHOS1+=1
            altitude = float(stationAltitude) + 1.5
            key="#{0}#heightOfBarometerAboveMeanSeaLevel".format(lblHOS2)
            codes_set(ibufr,key,altitude)
            lblHOS2+=1
            key="#{0}#surfaceQualifierForTemperatureData".format(imsg+1)
            codes_set(ibufr,key,3)
            key='#{0}#mainPresentWeatherDetectingSystem'.format(imsg+1)
            codes_set(ibufr, key, CODES_MISSING_LONG)
            key='#{0}#supplementaryPresentWeatherSensor'.format(imsg+1)
            codes_set(ibufr, key, CODES_MISSING_LONG)
            key='#{0}#visibilityMeasurementSystem'.format(imsg+1)
            codes_set(ibufr,key , CODES_MISSING_LONG)
            key='#{0}#cloudDetectionSystem'.format(imsg+1)
            codes_set(ibufr,key, CODES_MISSING_LONG)
            key='#{0}#lightningDetectionSensorType'.format(imsg+1)
            codes_set(ibufr,key , CODES_MISSING_LONG)
            key='#{0}#skyConditionAlgorithmType'.format(imsg+1)
            codes_set(ibufr,key , CODES_MISSING_LONG)
            key='#{0}#capabilityToDetectPrecipitationPhenomena'.format(imsg+1)
            codes_set(ibufr,key , CODES_MISSING_LONG)
            key='#{0}#capabilityToDetectOtherWeatherPhenomena'.format(imsg+1)
            codes_set(ibufr,key , CODES_MISSING_LONG)
            key='#{0}#capabilityToDetectObscuration'.format(imsg+1)
            codes_set(ibufr,key , CODES_MISSING_LONG)
            key='#{0}#capabilityToDiscriminateLightningStrikes'.format(imsg+1)
            codes_set(ibufr, key, CODES_MISSING_LONG)
            key="#{0}#nonCoordinatePressure".format(imsg+1)
            codes_set(ibufr,key,CODES_MISSING_DOUBLE)
            pressure=row["PresInst"]
            pressure = float(pressure) * 100
            key="#{0}#pressureReducedToMeanSeaLevel".format(imsg+1)
            codes_set(ibufr,key,pressure)
            key="#{0}#3HourPressureChange".format(imsg+1)
            codes_set(ibufr,key,CODES_MISSING_DOUBLE)
            key="#{0}#characteristicOfPressureTendency".format(imsg+1)
            codes_set(ibufr,key,2)
            key="#{0}#pressure".format(imsg+1)
            codes_set(ibufr, key, CODES_MISSING_DOUBLE)
            key='#{0}#nonCoordinateGeopotentialHeight'.format(imsg+1)
            codes_set(ibufr, key, CODES_MISSING_LONG)
            key='#{0}#heightOfSensorAboveLocalGroundOrDeckOfMarinePlatform'.format(lblHOSMP)
            codes_set(ibufr,key ,CODES_MISSING_DOUBLE)
            lblHOSMP+=1
            airTInst=row["airTinst"]
            if airTInst!=CODES_MISSING_DOUBLE:
                temperature = airTInst + 273.15
            else:
                temperature=airTInst
            key="#{0}#airTemperature".format(imsg+1)
            codes_set(ibufr,key,temperature)
            dewPInst=row["dewPInst"]
            dewPInst=dewPInst+273.15
            key="#{0}#dewpointTemperature".format(imsg+1)
            codes_set(ibufr,key,dewPInst)
            relHinst=row["relHinst"]
            key="#{0}#relativeHumidity".format(imsg+1)
            codes_set(ibufr,key , relHinst)
            key="#{0}#timeSignificance".format(lblTS)
            codes_set(ibufr, key, 2)
            lblTS+=1
            key="#{0}#timePeriod".format(lblTP)
            codes_set(ibufr, key, -10)
            lblTP+=1
            windDir=row["WindDir"]
            windSpeed=row['WindSpeed']
            key="#{0}#windDirection".format(imsg+1)
            codes_set(ibufr,key, windDir)
            key='#{0}#windSpeed'.format(imsg+1)
            codes_set(ibufr,key , windSpeed)
            
            key="#{0}#heightOfSensorAboveLocalGroundOrDeckOfMarinePlatform".format(lblHOSMP)
            codes_set(ibufr,key,10)
            
            key="#{0}#timePeriod".format(lblTP)
            codes_set(ibufr,key,-10)
            lblTP+=1
            key="#{0}#timePeriod".format(lblTP)
            codes_set(ibufr,key,-60)
            key='#{0}#totalPrecipitationOrTotalWaterEquivalent'.format(imsg+1)
            precip=row["Precip"]
            codes_set(ibufr,key , precip)
            
            key="#{0}#netRadiationIntegratedOverPeriodSpecified".format(imsg+1)
            rad=row["Rad"]
            codes_set(ibufr,key,rad)
        else:
            logging.info("Station {0} not found in the WIGOS TABLE FILE".format(stationID))
            nWrongMsg+=1
            

#######################################################             
###### at the end pack, write and release ibufr
    codes_set(ibufr, 'pack', 1)
    codes_write(ibufr, fout)
    codes_release(ibufr)
    return nValidMsg,nWrongMsg

    

def read_ascii(inputFilename):
    '''
    function to read the Ascii data into a pandas dataframe, 
        args:
            inputFilename : full path of the Ascii file for example /tmp/data/rema_20180918.txt
     
    uses white spaces as column delimiters
    index_col = False avoids using the first column (Station) as index
    names is the list of names from the excel it can be changed but this affects the dataframe
    returns the pandas dataframe dfM (for Measurement)
    '''
    dfM = pd.read_csv(inputFilename,
                     header = None, 
                     index_col = False,
                     delim_whitespace = True,
                     names = ['station', 'year', 'month', 'day', 'ObsHour', 'TensBat', 'TempCpu', 'airTinst',
                              'airTmax', 'airTmin', 'relHinst', 'relHmax', 'relHmin', 'dewPInst', 'dewPmax',
                              'dewPmin', 'PresInst', 'PresMax', 'PresMin', 'WindSpeed', 'WindDir', 'WindGust',
                              'Rad', 'Precip', 'CloudCoverTot', 'CloudCODE', 'CloudBase', 'Visib'])
    cols = dfM.columns.drop('station')
    dfM[cols] = dfM[cols].apply(pd.to_numeric, errors = 'coerce')
    return dfM


# column  1 = station    column 2 = year     column  3 = month     column 4 = day       column  5 = ObsHour  column  6 = TensBat  column 7 = TempCpu
# column  8 = airTinst   column 9 = airTmax  column 10 = airTmin  column 11 = relHinst  column 12 = relHmax  column 13 = relHmin
# column 14 = dewPInst  column 15 = dewPmax  column 16 = dewPmin  column 17 = PresInst  column 18 = PresMax  column 19 = PresMin
# column 20 = WindSpeed column 21 = WindDir  column 22 = WindGust column 23 = Rad       column 24 = Precip   column 25 = CloudCoverTot
# column 26 = CloudCODE column 27 = CloudBase column 28 = Visib
#
#     
# column 1 = station  column 2 = Name  column 3 = Latitude  column  4 = Longitude  column 5 = Elevation  column 6 = UF
# column 7 = Region   column 8 = Date  column 9 = ID_WIGOS  column 10 = status





def read_estacoes_data(StationCodeFile):
    '''
    reads the Stations Code File ( with information such as name,lat, lon,alt, WigosId etc)
    the name is read through the command line argument -c
    '''
    dfEst = pd.read_csv(StationCodeFile,
                        header = 0,
                        index_col = False)
    return dfEst



def main():
    '''
    main program reads the command line and encodes the messages into the output filename
        to run the program 
            program_name.py -a Ascii_input_file -b Bufr_output_file -w WIGOS_TABLE_FILE -l logFile 
    '''
    cmdLine = read_cmdline()
    inputFilename = cmdLine.ascii
##### changed this to make it work, change back according to your requirements
    # FileHeader
    #data_hora = inputFilename.split('_')
    #data_hora = data_hora[1].split('.aut')
    #data_hora = data_hora[0]
    #data_hora = list(data_hora)
    #dia = data_hora[6]  + data_hora[7]
    #hora = data_hora[8]  + data_hora
    dia="20"
    hora="12"
    FileHeader = 'ISAI99 SBBR ' + dia + hora + '00\r\r\n'

    outFilename = cmdLine.bufr 
    stationFile=cmdLine.wigoscodes
    logFile = cmdLine.logging 
    logging.basicConfig(filename=logFile,format = "%(asctime)s %(levelname)s:%(message)s ",level=logging.DEBUG)
    logging.info("Using ecCodes version {0}".format(codes_get_api_version()))
    fout = open(outFilename, 'w')
    fout.write(FileHeader)
    
    dfEstacoes = read_estacoes_data(stationFile)

    dfMeteo=read_ascii(inputFilename)
  

    nValidMsg,nWrongMsg=bufr_encode_new(dfMeteo,dfEstacoes,fout)
                        
    fout.close()
    logging.info( "number of valid Messages {0}  wrong messages {1}".format(nValidMsg,nWrongMsg))
    logging.info(' output file {0}'.format(outFilename))
    
if __name__ == '__main__':
    main()



Changed the bufr structure at request from Brazil. Now, there is only one bufr header and the different stations appear as subsets of the message.

The new program is not complete. Some work is requiered on the user side to fully adapt to the bufr structure and perhaps simplify the bufr_encode_new function.


DISCLAIMER: This software is intended for testing purposes only.  Feedback would be appreciated but technical support can only be given if our workload permits.