Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

  • Area cropping
  • Time series extraction 

There are different ways two scenarios to perform those operations:

  • From Remotely - Using the CDS API (less data is downloaded)
  • Locally (full control on the process)

Table of Contents

GloFAS

CDS API

Time series extraction:

  • to perform the operation remotely on the CDS compute nodes and retrieve only the reduced data.
  • Locally - Using the CDS API to retrieve the entire data and perform the operation locally.


Table of Contents

Settings for 


For the exercise on extracting time series on the local machine, we are going to use the latitude and longitude coordinates from a tiny subset of the GRDC dataset.

Copy the content of the box below into an empty file named "GRDC.csv", the file should reside in your working folder.


Code Block
titleGRDC dataset
collapsetrue
grdc_no,wmo_reg,sub_reg,river,station,country,lat,long,area,altitude
6118010,6,18,CLAIE,SAINT-JEAN-BREVELAY,FR,47.824831,-2.703308,137.0,99.98
6118015,6,18,YVEL,LOYAT (PONT D129),FR,47.993815,-2.368849,315.0,88.26
6118020,6,18,"ARON, RUISSEAU D'",GRAND-FOUGERAY (LA BERNADAISE),FR,47.71222,-1.690835,118.0,68.0
6118025,6,18,"CANUT, RUISSEAU DE",SAINT-JUST (LA RIVIERE COLOMBEL),FR,47.775309,-1.979609,37.0,80.22
6118030,6,18,AFF,PAIMPONT (PONT DU SECRET),FR,47.981631,-2.143762,30.2,119.01
6118050,6,18,COET-ORGAN,QUISTINIC (KERDEC),FR,47.904164,-3.201265,47.7,94.42
6118060,6,18,EVEL,GUENIN,FR,47.899928,-2.975167,316.0,95.16
6118070,6,18,STER-GOZ,BANNALEC (PONT MEYA),FR,47.906833,-3.752172,69.7,85.08
6118080,6,18,MOROS,CONCARNEAU (PONT D22),FR,47.882934,-3.875375,20


Retrieve the following datasets:

Code Block
languagepy
titleGet the GloFAS Historical data
collapsetrue
import cdsapi

c = cdsapi.Client()

c.retrieve(
    'cems-glofas-historical',
    {
        '
Code Block
languagepy
titleExtract timeseries: GloFAS Medium-range Reforecast
collapsetrue
import cdsapi
from datetime import datetime, timedelta



def get_monthsdays(start =[2019,1,1],end=[2019,12,31]):
# reforecast time index
start, end = datetime(*start),datetime(*end)
days = [start + timedelta(days=i) for i in range((end - start).days + 1)]
monthday = [d.strftime("%B-%d").split("-") for d in days if d.weekday() in [0,3] ] 

return monthday



if __name__ == '__main__':
c = cdsapi.Client()


# station coordinates (lat,lon)
COORDS = {
"Thames":[51.35,-0.45]
}

# select date index corresponding to the event 
MONTHSDAYS = get_monthsdays(start =[2019,7,11],end=[2019,7,11])

YEAR = '2007' 

LEADTIMES = ['%d'%(l) for l in range(24,1128,24)]

# loop over date index (just 1 in this case)
for md in MONTHSDAYS:

month = md[0].lower()
day = md[1]

# loop over station coordinates
for station in COORDS:

station_point_coord = COORDS[station]*2 # coordinates input for the area keyword

c.retrieve(
'cems-glofas-reforecast',
{
'system_version': 'version_2_2',
'variable': 'river_discharge_in_the_last_24_hours',
        'format': 'grib',
        'hydrological_model': 'htessel_lisflood',
        'product_type': ['control_reforecastintermediate','ensemble_perturbed_reforecasts'],
'area':station_point_coord, 
'hyear': YEAR,

        'hyear': '2021',
        'hmonth': month 'january',
        'hday': day ,
'leadtime_hour': LEADTIMES,
},
f'glofas_reforecast_{station}_{month}_{day}.grib')

Area cropping:

Code Block
languagepy
titleArea cropping: GloFAS Medium-range reforecast 
collapsetrue
## === retrieve GloFAS Medium-Range Reforecast ===

## === subset India, Pakistan, Nepal and Bangladesh region === 


import cdsapi
from datetime import datetime, timedelta


def get_monthsdays():

    start, end = datetime(2019, 1, 1), datetime(2019, 12, 31)
    days = [start + timedelta(days=i) for i in range((end - start).days + 1)]
    monthday = [d.strftime("%B-%d").split("-")  for d in days if d.weekday() in [0,3] ]   

    return monthday

MONTHSDAYS = get_monthsdays()

if __name__ == '__main__':
    c = cdsapi.Client()
    
    # user inputs
	BBOX = [40.05 ,59.95, 4.95, 95.05] # North West South East
    YEARS  = ['%d'%(y) for y in range(1999,2019)]
    LEADTIMES = ['%d'%(l) for l in range(24,1128,24)]
    
    # submit request
    for md in MONTHSDAYS:
 [
            '01', '02', '03',
            '04', '05', '06',
            '07', '08', '09',
            '10', '11', '12',
            '13', '14', '15',
            '16', '17', '18',
            '19', '20', '21',
            '22', '23', '24',
            '25', '26', '27',
            '28', '29', '30',
            '31',
        month = md[0].lower()],
        day = md[1]

'system_version': 'version_3_1',
        c.retrieve(
        },
    'cems-glofas-reforecast',
  glofas_historical.grib')


Code Block
titleGet the EFAS reforecast data
collapsetrue
import cdsapi

c = cdsapi.Client()

c.retrieve(
     'efas-reforecast',
     {
        'format': 'grib',
        'systemproduct_versiontype': 'versionensemble_2perturbed_2reforecasts',
                'variable': 'river_discharge_in_the_last_246_hours',
          'model_levels': 'surface_level',
        'formathyear': 'grib2007',
        'hmonth': 'march',
        'hydrological_modelhday': 'htessel_lisflood',[
                'product_type': 'control_reforecast',
				'area': BBOX,# < - subset'04', '07',
        ],
        'hyearleadtime_hour': YEARS,[
                'hmonth': month ,
                'hday': day ,
  '0', '12', '18',
              'leadtime_hour': LEADTIMES6',
        ],
    },
            f'glofasefas_reforecast_{month}_{day}.grib')

GloFAS

CDS API

Time series extraction:


Code Block
languagepy
titleArea croppingExtract timeseries: GloFAS Seasonal ForecastMedium-range Reforecast
collapsetrue
## === retrieve GloFAS Seasonal Forecast ===

## === subset South America/Amazon region === 

import cdsapi


if __name__ == '__main__':
    c = cdsapi.Client()

    YEARS  = ['%d'%(y) for y in range(2020,2022)]


    MONTHS = ['%02d'%(m) for m in range(1,13)]

    LEADTIMES = ['%d'%(l) for l in range(24,2976,24)]
    
    for year in YEARS:

        for month in MONTHS:
            
            c.retrieve(
                'cems-glofas-seasonal',
                {   
                import cdsapi
from datetime import datetime, timedelta



def get_monthsdays(start =[2019,1,1],end=[2019,12,31]):
# reforecast time index
start, end = datetime(*start),datetime(*end)
days = [start + timedelta(days=i) for i in range((end - start).days + 1)]
monthday = [d.strftime("%B-%d").split("-") for d in days if d.weekday() in [0,3] ] 

return monthday



if __name__ == '__main__':
c = cdsapi.Client()


# station coordinates (lat,lon)
COORDS = {
"Thames":[51.35,-0.45]
}

# select date index corresponding to the event 
MONTHSDAYS = get_monthsdays(start =[2019,7,11],end=[2019,7,11])

YEAR = '2007' 

LEADTIMES = ['%d'%(l) for l in range(24,1128,24)]

# loop over date index (just 1 in this case)
for md in MONTHSDAYS:

month = md[0].lower()
day = md[1]

# loop over station coordinates
for station in COORDS:

station_point_coord = COORDS[station]*2 # coordinates input for the area keyword

c.retrieve(
'cems-glofas-reforecast',
{
'system_version': 'version_2_2',
'variable': 'river_discharge_in_the_last_24_hours',
                'format': 'grib',
                'year'hydrological_model': year'htessel_lisflood',
                'month': '12' if year == '2020' else month,
                'product_type': ['control_reforecast','ensemble_perturbed_reforecasts'],
'area':station_point_coord, 
'hyear': YEAR,
'hmonth': month ,
'hday': day ,
'leadtime_hour': LEADTIMES,
				'area': [ 10.95, -90.95, -30.95, -29.95 ]

                },
                },
f'glofas_seasonalreforecast_{yearstation}_{month}_{day}.grib')

Area cropping:


Code Block
languagepy
titleArea cropping: GloFAS Seasonal ReforecastMedium-range reforecast 
collapsetrue
## === retrieve GloFAS SeasonalMedium-Range Reforecast ===

## === subset South America/AmazonIndia, Pakistan, Nepal and Bangladesh region === 


import cdsapi

if __name__ == '__main__':from datetime import datetime, timedelta


    c = cdsapi.Clientdef get_monthsdays():

    YEARSstart, end = ['%d'%(y) for y in range(1981,2021)]
datetime(2019, 1, 1), datetime(2019, 12, 31)
    MONTHSdays = ['january', 'february', 'march', 'april', 'may', 'june', 'july', 'august', 'september', 'october', 'november', 'december']

    LEADTIMES = ['%d'%(l) for l in range(24,2976,24)]
    
    for year in YEARS:
        for month in MONTHS:

            c.retrieve(
                'cems-glofas-seasonal-reforecast',
                {
                    'system_version': 'version_2_2',
  start + timedelta(days=i) for i in range((end - start).days + 1)]
    monthday = [d.strftime("%B-%d").split("-")  for d in days if d.weekday() in [0,3] ]   

    return monthday

MONTHSDAYS = get_monthsdays()

if __name__ == '__main__':
    c = cdsapi.Client()
    
    # user inputs
	BBOX = [40.05 ,59.95, 4.95, 95.05] # North West South East
    YEARS  = ['%d'%(y) for y in range(1999,2019)]
    LEADTIMES = ['%d'%(l) for l in range(24,1128,24)]
    
    # submit request
    for md in MONTHSDAYS:

        month = md[0].lower()
        day = md[1]

        'variable':'river_discharge_in_the_last_24_hours',c.retrieve(
            'cems-glofas-reforecast',
        'format':'grib',
    {
                'hydrologicalsystem_modelversion': 'htesselversion_2_lisflood2',
                    'hyearvariable': year'river_discharge_in_the_last_24_hours',
                    'hmonthformat': month'grib',
                    'leadtimehydrological_hourmodel': LEADTIMES'htessel_lisflood',
                    'product_type': 'control_reforecast',
				'area': [ 10.95BBOX,# -90.95,< -30.95, -29.95 ]
 subset
                'hyear': }YEARS, 
                f'glofas_seasonal_reforecast_{year}_{month}.grib')

Local machine

Code Block
languagepy
titleGet the GloFAS Historical data
collapsetrue
import cdsapi

c = cdsapi.Client()

c.retrieve(
'hmonth': month ,
                'cems-glofas-historical'hday': day ,
    {
           'variable': 'river_discharge_in_the_last_24_hours',
        'format': 'grib'leadtime_hour': LEADTIMES,
        'hydrological_model': 'lisflood',
        'product_type': 'intermediate'},
        'hyear': '2021',
      f'glofas_reforecast_{month}_{day}.grib')


Code Block
languagepy
titleArea cropping: GloFAS Seasonal Forecast
collapsetrue
## === retrieve GloFAS Seasonal Forecast ===

## === subset South America/Amazon region === 

import cdsapi


if __name__ == '__main__':
    c = cdsapi.Client()

    YEARS  = ['%d'%(y) for y in range(2020,2022)]


    MONTHS = ['%02d'%(m) for m in range(1,13)]

    LEADTIMES = ['%d'%(l) for l in range(24,2976,24)]
    
    for year in YEARS:

        for month in MONTHS:
  'hmonth': 'january',
        'hday': [
            '01', '02', '03',
            '04', '05', '06',
            '07', '08', '09',
            '10', '11', '12',
            '13', '14', '15',
   
         '16', '17', '18',   c.retrieve(
            '19', '20', '21    'cems-glofas-seasonal',
               '22', '23', '24', {   
            '25',    '26variable',: '27river_discharge_in_the_last_24_hours',
                '28format',: '29grib', '30',

                '31year': year,
        ],
        'system_versionmonth': 'version_3_1',
    }12' if year == '2020' else month,
    'glofas_historical.grib')

Time series extraction:

Code Block
languagepy
titleScript
collapsetrue
import xarray as xr
import pandas as pd

parameter = "dis24"

ds = xr.open_dataset("glofas_historical.grib", engine="cfgrib",backend_kwargs={'time_dims':['time']})
df = pd.read_csv("GRDC.csv")

total = len(df)

rows = []
count = 0
for lon, lat, id in zip(df.long, df.lat, df.grdc_no):
    extracted = ds.sel(longitude=lon, latitude=lat, method="nearest")[parameter]
    df_temp = extracted.drop_vars(["surface"]).to_dataframe().reset_index()
    df_temp["grdc"] = str(id)
    df_temp = df_temp.set_index(["grdc", "time"])
    rows.append(df_temp)
    count += 1
    print(f"progress: {count/total*100} %")

out = pd.concat(rows)
out.to_csv("extracted.csv", index="grdc")

Area cropping:

Code Block
languagepy
titleScript
collapsetrue
import xarray as xr

# Rhine's basin bounding box
bbox = [50.972204, 46.296530, 5.450796, 11.871059]  # N,S,W,E

ds = xr.open_dataset("glofas_historical.grib", engine="cfgrib")

ds_cropped = ds.sel(
    longitude=slice(bbox[2], bbox[3]), latitude=slice(bbox[0], bbox[1])
)

ds_cropped.to_netcdf("glofas_historical_cropped.nc")

EFAS

Warning
titleCoordinates precision

When transforming from lat/lon (source coordinates) to projected LAEA (target coordinates), you need to consider that the number of decimal places of the source coordinates affects the target coordinates precision:

An interval of 0.001 degrees corresponds to about 100 metres in LAEA.

An interval of 0.00001 degrees corresponds to about 1 metre in LAEA.

CDS API

to update once cropping works....

Time series extraction:

Area cropping:

Local machine

            'leadtime_hour': LEADTIMES,
				'area': [ 10.95, -90.95, -30.95, -29.95 ]

                },
                f'glofas_seasonal_{year}_{month}.grib')


Code Block
languagepy
titleArea cropping: GloFAS Seasonal Reforecast
collapsetrue
## === retrieve GloFAS Seasonal Reforecast ===

## === subset South America/Amazon region === 

import cdsapi

if __name__ == '__main__':


    c = cdsapi.Client()

    YEARS  = ['%d'%(y) for y in range(1981,2021)]

    MONTHS = ['january', 'february', 'march', 'april', 'may', 'june', 'july', 'august', 'september', 'october', 'november', 'december']

    LEADTIMES = ['%d'%(l) for l in range(24,2976,24)]
    
    for year in YEARS:
        for month in MONTHS:

            c.retrieve(
                'cems-glofas-seasonal-reforecast',
                {
                    'system_version': 'version_2_2',
                    'variable':'river_discharge_in_the_last_24_hours',
                    'format':'grib',
            
Code Block
titleGet the EFAS reforecast data
collapsetrue
import cdsapi

c = cdsapi.Client()

c.retrieve(
    'efas-reforecast',
    {
        'formathydrological_model': 'gribhtessel_lisflood',
                 'product_type': 'ensemble_perturbed_reforecasts',
   'hyear': year,
                    'variablehmonth': 'river_discharge_in_the_last_6_hours'month,
        'model_levels            'leadtime_hour': 'surface_level'LEADTIMES,
        'hyear': '2007',
            'hmontharea': 'march',
     [ 10.95, -90.95, -30.95, -29.95 ]
    'hday': [
            '04'}, '07',
        ],
        'leadtime_hour': [
            '0', '12', '18',
            '6',
        ],
    },
    'efas_reforecast.grib')

Time series extraction:

We are going to extract EFAS reforecast's timeseries at locations defined by latitude and longitude coordinates from a tiny subset of the GRDC dataset.

Info
titleImportant - Download upstream area

EFAS's x and  y coordinates, when converted from GRIB to NetCDF, are not projected coordinates but matrix indexes (i, j), It is necessary to download the upstream area static file that contains the projected coordinates and replace it in EFAS.

Copy the content into an empty file named "GRDC.csv", the file should reside in the same folder of the efas_reforecast.grib file and the upstream area.

f'glofas_seasonal_reforecast_{year}_{month}.grib')


Local machine



Time series extraction:


Code Block
languagepy
titleScript
collapsetrue
import xarray as xr
import pandas as pd

parameter = "dis24"

ds = xr.open_dataset("glofas_historical.grib", engine="cfgrib",backend_kwargs={'time_dims':['time']})
df = pd.read_csv("GRDC.csv")

total = len(df)

rows = []
count = 0
for lon, lat, id in zip(df.long, df.lat, df.grdc_no):
    extracted = ds.sel(longitude=lon, latitude=lat, method="nearest")[parameter]
    df_temp = extracted.drop_vars(["surface"]).to_dataframe().reset_index()
    df_temp["grdc"] = str(id)
    df_temp = df_temp.set_index(["grdc", "time"])
    rows.append(df_temp)
    count += 1
    print(f"progress: {count/total*100} %")

out = pd.concat(rows)
out.to_csv("extracted.csv", index="grdc")


Area cropping:


Code Block
languagepy
titleScript
collapsetrue
import xarray as xr

# Rhine's basin bounding box
bbox = [50.972204, 46.296530, 5.450796, 11.871059]  # N,S,W,E

ds = xr.open_dataset("glofas_historical.grib", engine="cfgrib")

ds_cropped = ds.sel(
    longitude=slice(bbox[2], bbox[3]), latitude=slice(bbox[0], bbox[1])
)

ds_cropped.to_netcdf("glofas_historical_cropped.nc")

EFAS


Warning
titleCoordinates precision

When transforming from lat/lon (source coordinates) to projected LAEA (target coordinates), you need to consider that the number of decimal places of the source coordinates affects the target coordinates precision:

An interval of 0.001 degrees corresponds to about 100 metres in LAEA.

An interval of 0.00001 degrees corresponds to about 1 metre in LAEA.

CDS API

to update once cropping works....

Time series extraction:


Area cropping:

Local machine


Time series extraction:

We are going to extract EFAS reforecast's timeseries at locations defined by latitude and longitude coordinates from a tiny subset of the GRDC dataset.

Info
titleImportant - Download upstream area

EFAS's x and  y coordinates, when converted from GRIB to NetCDF, are not projected coordinates but matrix indexes (i, j), It is necessary to download the upstream area static file that contains the projected coordinates and replace it in EFAS.

Code Block
titleGRDC dataset
collapsetrue
grdc_no,wmo_reg,sub_reg,river,station,country,lat,long,area,altitude 6118010,6,18,CLAIE,SAINT-JEAN-BREVELAY,FR,47.824831,-2.703308,137.0,99.98 6118015,6,18,YVEL,LOYAT (PONT D129),FR,47.993815,-2.368849,315.0,88.26 6118020,6,18,"ARON, RUISSEAU D'",GRAND-FOUGERAY (LA BERNADAISE),FR,47.71222,-1.690835,118.0,68.0 6118025,6,18,"CANUT, RUISSEAU DE",SAINT-JUST (LA RIVIERE COLOMBEL),FR,47.775309,-1.979609,37.0,80.22 6118030,6,18,AFF,PAIMPONT (PONT DU SECRET),FR,47.981631,-2.143762,30.2,119.01 6118050,6,18,COET-ORGAN,QUISTINIC (KERDEC),FR,47.904164,-3.201265,47.7,94.42 6118060,6,18,EVEL,GUENIN,FR,47.899928,-2.975167,316.0,95.16 6118070,6,18,STER-GOZ,BANNALEC (PONT MEYA),FR,47.906833,-3.752172,69.7,85.08 6118080,6,18,MOROS,CONCARNEAU (PONT D22),FR,47.882934,-3.875375,20



Code Block
languagepy
titleScript
collapsetrue
import xarray as xr
import pandas as pd
from pyproj import Transformer,CRS

parameter = "dis06"

ds = xr.open_dataset("efas_reforecast.grib", engine="cfgrib")
df = pd.read_csv("GRDC.csv")
uparea = xr.open_dataset("ec_uparea4.0.nc") # the upstream area

# replace x, y
ds["x"] = uparea["x"]
ds["y"] = uparea["y"]

# define reprojection parameters
laea_proj = CRS.from_proj4("+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +units=m +no_defs")
transformer = Transformer.from_crs('epsg:4326', laea_proj, always_xy=True)

total = len(df)

rows = []
count = 0
for lon, lat, id in zip(df.long, df.lat, df.grdc_no):
    x1, y1 = transformer.transform(lon, lat)
    extracted = ds.sel(x=x1, y=y1, number = 1, method="nearest")[parameter]
    df_temp = extracted.drop_vars(["surface", "number"]).to_dataframe().reset_index()
    df_temp["grdc"] = str(id)
    df_temp = df_temp.drop("step", axis=1)
    df_temp = df_temp.set_index(["grdc","time"])
    rows.append(df_temp)
    count += 1
    print(f"progress: {count/total*100} %")

out = pd.concat(rows)
out.to_csv("extracted.csv", index="grdc")

...