Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

For example, when comparing modelled river flow against observations, it is reasonable to being be able to extract the timeseries time-series at those point coordinates rather than dealing with many GB of data. Similarly, when focusing on a specific catchment it is likely that you want to deal with only that part of the spatial domain.

In summary, there are two operations of data size reduction that are very popular on CEMS-Flood datasets, area cropping and time-series extraction.

There are two ways to perform those operations:

...

Code Block
titleGet the EFAS reforecast data
collapsetrue
import cdsapi

c = cdsapi.Client()

c.retrieve(
    'efas-reforecast',
    {
        'format': 'grib',
        'product_type': 'ensemble_perturbed_reforecasts',
        'variable': 'river_discharge_in_the_last_6_hours',
        'model_levels': 'surface_level',
        'hyear': '2007',
        'hmonth': 'march',
        'hday': [
            '04', '07',
        ],
        'leadtime_hour': [
            '0', '12', '18',
            '6',
        ],
    },
    'efas_reforecast.grib')

GloFAS

Remote processing

Time series extraction:

Code Block
languagepy
titleExtract timeseries: GloFAS Medium-range Reforecast
collapsetrue
import cdsapi
from datetime import datetime, timedelta



def get_monthsdays(start =[2019,1,1],end=[2019,12,31]):
# reforecast time index
start, end = datetime(*start),datetime(*end)
days = [start + timedelta(days=i) for i in range((end - start).days + 1)]
monthday = [d.strftime("%B-%d").split("-") for d in days if d.weekday() in [0,3] ] 

return monthday



if __name__ == '__main__':
c = cdsapi.Client()


# station coordinates (lat,lon)
COORDS = {
"Thames":[51.35,-0.45]
}

# select date index corresponding to the event 
MONTHSDAYS = get_monthsdays(start =[2019,7,11],end=[2019,7,11])

YEAR = '2007' 

LEADTIMES = ['%d'%(l) for l in range(24,1128,24)]

# loop over date index (just 1 in this case)
for md in MONTHSDAYS:

month = md[0].lower()
day = md[1]

# loop over station coordinates
for station in COORDS:

station_point_coord = COORDS[station]*2 # coordinates input for the area keyword

c.retrieve(
'cems-glofas-reforecast',
{
'system_version': 'version_2_2',
'variable': 'river_discharge_in_the_last_24_hours',
'format': 'grib',
'hydrological_model': 'htessel_lisflood',
'product_type': ['control_reforecast','ensemble_perturbed_reforecasts'],
'area':station_point_coord, 
'hyear': YEAR,
'hmonth': month ,
'hday': day ,
'leadtime_hour': LEADTIMES,
},
f'glofas_reforecast_{station}_{month}_{day}.grib')

Area cropping:


...

EFAS


Warning
titleCoordinates precision

When transforming from lat/lon (source coordinates) to projected LAEA (target coordinates), you need to consider that the number of decimal places of the source coordinates affects the target coordinates precision:

An interval of 0.001 degrees corresponds to about 100 metres in LAEA.

An interval of 0.00001 degrees corresponds to about 1 metre in LAEA.

Remote processing

to update once cropping works....

Time series extraction:

Code Block
languagepy
titleTBD

Area cropping:

Code Block
languagepy
titleTBD

Local processing

Time series extraction:

Info
titleImportant - Download upstream area

EFAS x and y coordinates, when converted from GRIB to NetCDF, are not projected coordinates but matrix indexes (i, j), It is necessary to download the upstream area static file that contains the projected coordinates and replace it in EFAS.


Code Block
languagepy
titleScript
collapsetrue
import xarray as xr
import pandas as pd
from pyproj import Transformer,CRS

parameter = "dis06"

ds = xr.open_dataset("efas_reforecast.grib", engine="cfgrib")
df = pd.read_csv("GRDC.csv")
uparea = xr.open_dataset("ec_uparea4.0.nc") # the upstream area

# replace x, y
ds["x"] = uparea["x"]
ds["y"] = uparea["y"]

# define reprojection parameters
laea_proj = CRS.from_proj4("+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +units=m +no_defs")
transformer = Transformer.from_crs('epsg:4326', laea_proj, always_xy=True)

total = len(df)

rows = []
count = 0
for lon, lat, id in zip(df.long, df.lat, df.grdc_no):
    x1, y1 = transformer.transform(lon, lat)
    extracted = ds.sel(x=x1, y=y1, number = 1, method="nearest")[parameter]
    df_temp = extracted.drop_vars(["surface", "number"]).to_dataframe().reset_index()
    df_temp["grdc"] = str(id)
    df_temp = df_temp.drop("step", axis=1)
    df_temp = df_temp.set_index(["grdc","time"])
    rows.append(df_temp)
    count += 1
    print(f"progress: {count/total*100} %")

out = pd.concat(rows)
out.to_csv("extracted.csv", index="grdc")

Area cropping:

Code Block
languagepy
titleScript
collapsetrue
import xarray as xr
from pyproj import Transformer, CRS
import numpy as np

# Rhine's basin bounding box
bbox = [50.972204, 46.296530, 5.450796, 11.871059]  # N,S,W,E

ds = xr.open_dataset("efas_reforecast.grib", engine="cfgrib")
uparea = xr.open_dataset("ec_uparea4.0.nc")

# replace x, y
ds["x"] = uparea["x"]
ds["y"] = uparea["y"]

# define reprojection parameters
laea_proj = CRS.from_proj4(
    "+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +units=m +no_defs"
)
transformer = Transformer.from_crs("epsg:4326", laea_proj, always_xy=True)

we = bbox[2:]
ns = bbox[:2]

we_xy, ns_xy = transformer.transform(we, ns)

we_xy = [np.floor(we_xy[0]), np.ceil(we_xy[1])]
ns_xy = [np.ceil(ns_xy[0]), np.floor(ns_xy[1])]

ds_cropped = ds.sel(
    x=slice(we_xy[0], we_xy[1]), y=slice(ns_xy[0], ns_xy[1])
)

ds_cropped.to_netcdf("efas_forecast_cropped.nc")


...

GloFAS

Remote processing

Time series extraction:

Code Block
languagepy
titleExtract timeseries: GloFAS Medium-range Reforecast
collapsetrue
import cdsapi
from datetime import datetime, timedelta



def get_monthsdays(start =[2019,1,1],end=[2019,12,31]):
# reforecast time index
start, end = datetime(*start),datetime(*end)
days = [start + timedelta(days=i) for i in range((end - start).days + 1)]
monthday = [d.strftime("%B-%d").split("-") for d in days if d.weekday() in [0,3] ] 

return monthday



if __name__ == '__main__':
c = cdsapi.Client()


# station coordinates (lat,lon)
COORDS = {
"Thames":[51.35,-0.45]
}

# select date index corresponding to the event 
MONTHSDAYS = get_monthsdays(start =[2019,7,11],end=[2019,7,11])

YEAR = '2007' 

LEADTIMES = ['%d'%(l) for l in range(24,1128,24)]

# loop over date index (just 1 in this case)
for md in MONTHSDAYS:

month = md[0].lower()
day = md[1]

# loop over station coordinates
for station in COORDS:

station_point_coord = COORDS[station]*2 # coordinates input for the area keyword

c.retrieve(
'cems-glofas-reforecast',
{
'system_version': 'version_2_2',
'variable': 'river_discharge_in_the_last_24_hours',
'format': 'grib',
'hydrological_model': 'htessel_lisflood',
'product_type': ['control_reforecast','ensemble_perturbed_reforecasts'],
'area':station_point_coord, 
'hyear': YEAR,
'hmonth': month ,
'hday': day ,
'leadtime_hour': LEADTIMES,
},
f'glofas_reforecast_{station}
Code Block
languagepy
titleArea cropping: GloFAS Medium-range reforecast 
collapsetrue
## === retrieve GloFAS Medium-Range Reforecast ===

## === subset India, Pakistan, Nepal and Bangladesh region === 


import cdsapi
from datetime import datetime, timedelta


def get_monthsdays():

    start, end = datetime(2019, 1, 1), datetime(2019, 12, 31)
    days = [start + timedelta(days=i) for i in range((end - start).days + 1)]
    monthday = [d.strftime("%B-%d").split("-")  for d in days if d.weekday() in [0,3] ]   

    return monthday

MONTHSDAYS = get_monthsdays()

if __name__ == '__main__':
    c = cdsapi.Client()
    
    # user inputs
	BBOX = [40.05 ,59.95, 4.95, 95.05] # North West South East
    YEARS  = ['%d'%(y) for y in range(1999,2019)]
    LEADTIMES = ['%d'%(l) for l in range(24,1128,24)]
    
    # submit request
    for md in MONTHSDAYS:

        month = md[0].lower()
        day = md[1]

        c.retrieve(
            'cems-glofas-reforecast',
            {
                'system_version': 'version_2_2',
                'variable': 'river_discharge_in_the_last_24_hours',
                'format': 'grib',
                'hydrological_model': 'htessel_lisflood',
                'product_type': 'control_reforecast',
				'area': BBOX,# < - subset
                'hyear': YEARS,
                'hmonth': month ,
                'hday': day ,
                'leadtime_hour': LEADTIMES,
            },
            f'glofas_reforecast_{month}_{day}.grib')

Area cropping:

Code Block
languagepy
titleArea cropping: GloFAS Seasonal ForecastMedium-range reforecast 
collapsetrue
## === retrieve GloFAS SeasonalMedium-Range ForecastReforecast ===

## === subset South America/AmazonIndia, Pakistan, Nepal and Bangladesh region === 


import cdsapi
from datetime import datetime, timedelta


ifdef __name__ == '__main__':
get_monthsdays():

    cstart, end = cdsapi.Client()

    YEARS  = ['%d'%(ydatetime(2019, 1, 1), datetime(2019, 12, 31)
    days = [start + timedelta(days=i) for yi in range(2020,2022(end - start).days + 1)]


    MONTHSmonthday = ['%02d'%(m) for m in range(1,13)]

    LEADTIMES = ['%d'%(l) for l in range(24,2976,24)]
    d.strftime("%B-%d").split("-")  for d in days if d.weekday() in [0,3] ]   

    return monthday

MONTHSDAYS = get_monthsdays()

if __name__ == '__main__':
    forc year in YEARS:

= cdsapi.Client()
    
    for# month in MONTHS:
            user inputs
	BBOX = [40.05 ,59.95, 4.95, 95.05] # North West South East
    YEARS  = ['%d'%(y) for y in  c.retrieve(range(1999,2019)]
    LEADTIMES = ['%d'%(l) for l        'cems-glofas-seasonal',in range(24,1128,24)]
    
    # submit request
    for md in MONTHSDAYS:

   {   
  month = md[0].lower()
        day = md[1]

  'variable': 'river_discharge_in_the_last_24_hours',
     c.retrieve(
           'format': 'grib',cems-glofas-reforecast',
            {
                'yearsystem_version': year'version_2_2',
                'monthvariable': '12' if year == '2020' else monthriver_discharge_in_the_last_24_hours',
                'format': 'grib',
                'leadtime_hour': LEADTIMEShydrological_model': 'htessel_lisflood',
                'product_type': 'control_reforecast',
				'area': [ 10.95BBOX,# -90.95,< -30.95, -29.95 ]

 subset
                'hyear': }YEARS,
                f'glofas_seasonal_{year}_{month}.grib')
Code Block
languagepy
titleArea cropping: GloFAS Seasonal Reforecast
collapsetrue
## === retrieve GloFAS Seasonal Reforecast ===

## === subset South America/Amazon region === 

import cdsapi

if __name__ == '__main__':

'hmonth': month ,
                'hday': day ,
    c = cdsapi.Client()

    YEARS  = ['%d'%(y) for y in range(1981,2021)]

 'leadtime_hour': LEADTIMES,
      MONTHS = ['january', 'february', 'march', 'april', 'may', 'june', 'july', 'august', 'september', 'october', 'november', 'december']

 },
           LEADTIMES f'glofas_reforecast_{month}_{day}.grib')


Code Block
languagepy
titleArea cropping: GloFAS Seasonal Forecast
collapsetrue
## === retrieve GloFAS Seasonal Forecast ===

## === subset South America/Amazon region === 

import cdsapi


if __name__ == '__main__':
    c = cdsapi.Client()= ['%d'%(l) for l in range(24,2976,24)]
    
    for year in YEARS:
        for month in MONTHS:

    YEARS        c.retrieve(
= ['%d'%(y) for y in range(2020,2022)]


    MONTHS = ['%02d'%(m) for m in range(1,13)]

    LEADTIMES = 'cems-glofas-seasonal-reforecast',
    ['%d'%(l) for l in range(24,2976,24)]
    
    for year in  {
YEARS:

        for month in MONTHS:
           'system_version': 'version_2_2',
            c.retrieve(
        'variable':'river_discharge_in_the_last_24_hours        'cems-glofas-seasonal',
                 {   'format':'grib',
                    'hydrological_model':'htessel_lisflood'variable': 'river_discharge_in_the_last_24_hours',
                    'hyearformat': year'grib',
                    'hmonthyear': monthyear,
                'month': '12' if year == 'leadtime_hour': LEADTIMES2020' else month,
                    'leadtime_hour': LEADTIMES,
				'area': [ 10.95, -90.95, -30.95, -29.95 ]

                }, 
                f'glofas_seasonal_reforecast_{{year}_{month}.grib')

Local processing

...


Code Block
languagepy
titleScriptArea cropping: GloFAS Seasonal Reforecast
collapsetrue
import## xarray=== asretrieve xr
importGloFAS pandasSeasonal asReforecast pd

parameter = "dis24"

ds = xr.open_dataset("glofas_historical.grib", engine="cfgrib",backend_kwargs={'time_dims':['time']})
df = pd.read_csv("GRDC.csv")

total = len(df)

rows = []
count = 0
for lon, lat, id in zip(df.long, df.lat, df.grdc_no):
    extracted = ds.sel(longitude=lon, latitude=lat, method="nearest")[parameter]
    df_temp = extracted.drop_vars(["surface"]).to_dataframe().reset_index()
    df_temp["grdc"] = str(id)
    df_temp = df_temp.set_index(["grdc", "time"])
    rows.append(df_temp)
    count += 1
    print(f"progress: {count/total*100} %")

out = pd.concat(rows)
out.to_csv("extracted.csv", index="grdc")

Area cropping:

Code Block
languagepy
titleScript
collapsetrue
import xarray as xr

# Rhine's basin bounding box
bbox = [50.972204, 46.296530, 5.450796, 11.871059]  # N,S,W,E

ds = xr.open_dataset("glofas_historical.grib", engine="cfgrib")

ds_cropped = ds.sel(
    longitude=slice(bbox[2], bbox[3]), latitude=slice(bbox[0], bbox[1])
)

ds_cropped.to_netcdf("glofas_historical_cropped.nc")

EFAS

Warning
titleCoordinates precision

When transforming from lat/lon (source coordinates) to projected LAEA (target coordinates), you need to consider that the number of decimal places of the source coordinates affects the target coordinates precision:

An interval of 0.001 degrees corresponds to about 100 metres in LAEA.

An interval of 0.00001 degrees corresponds to about 1 metre in LAEA.

Remote processing

to update once cropping works....

Time series extraction:

...

languagepy
titleTBD
===

## === subset South America/Amazon region === 

import cdsapi

if __name__ == '__main__':


    c = cdsapi.Client()

    YEARS  = ['%d'%(y) for y in range(1981,2021)]

    MONTHS = ['january', 'february', 'march', 'april', 'may', 'june', 'july', 'august', 'september', 'october', 'november', 'december']

    LEADTIMES = ['%d'%(l) for l in range(24,2976,24)]
    
    for year in YEARS:
        for month in MONTHS:

            c.retrieve(
                'cems-glofas-seasonal-reforecast',
                {
                    'system_version': 'version_2_2',
                    'variable':'river_discharge_in_the_last_24_hours',
                    'format':'grib',
                    'hydrological_model':'htessel_lisflood',
                    'hyear': year,
                    'hmonth': month,
                    'leadtime_hour': LEADTIMES,
                    'area': [ 10.95, -90.95, -30.95, -29.95 ]
                }, 
                f'glofas_seasonal_reforecast_{year}_{month}.grib')


Local processing

Time series extraction:

Code Block
languagepy
titleScript
collapsetrue
import xarray as xr
import pandas as pd

parameter = "dis24"

ds = xr.open_dataset("glofas_historical.grib", engine="cfgrib",backend_kwargs={'time_dims':['time']})
df = pd.read_csv("GRDC.csv")

total = len(df)

rows = []
count = 0
for lon, lat, id in zip(df.long, df.lat, df.grdc_no):
    extracted = ds.sel(longitude=lon, latitude=lat, method="nearest")[parameter]
    df_temp = extracted.drop_vars(["surface"]).to_dataframe().reset_index()
    df_temp["grdc"] = str(id)
    df_temp = df_temp.set_index(["grdc", "time"])
    rows.append(df_temp)
    count += 1
    print(f"progress: {count/total*100} %")

out = pd.concat(rows)
out.to_csv("extracted.csv", index="grdc")

Area cropping:

Code Block
languagepy
titleScript
collapsetrue
import xarray as xr

# Rhine's basin bounding box
bbox = [50.972204, 46.296530, 5.450796, 11.871059]  # N,S,W,E

ds = xr.open_dataset("glofas_historical.grib", engine="cfgrib")

Area cropping:

...

languagepy
titleTBD

Local processing

Time series extraction:

Info
titleImportant - Download upstream area

EFAS x and y coordinates, when converted from GRIB to NetCDF, are not projected coordinates but matrix indexes (i, j), It is necessary to download the upstream area static file that contains the projected coordinates and replace it in EFAS.

Code Block
languagepy
titleScript
collapsetrue
import xarray as xr
import pandas as pd
from pyproj import Transformer,CRS

parameter = "dis06"

ds = xr.open_dataset("efas_reforecast.grib", engine="cfgrib")
df = pd.read_csv("GRDC.csv")
uparea = xr.open_dataset("ec_uparea4.0.nc") # the upstream area

# replace x, y
ds["x"] = uparea["x"]
ds["y"] = uparea["y"]

# define reprojection parameters
laea_proj = CRS.from_proj4("+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +units=m +no_defs")
transformer = Transformer.from_crs('epsg:4326', laea_proj, always_xy=True)

total = len(df)

rows = []
count = 0
for lon, lat, id in zip(df.long, df.lat, df.grdc_no):
    x1, y1 = transformer.transform(lon, lat)
    extracted = ds.sel(x=x1, y=y1, number = 1, method="nearest")[parameter]
    df_temp = extracted.drop_vars(["surface", "number"]).to_dataframe().reset_index()
    df_temp["grdc"] = str(id)
    df_temp = df_temp.drop("step", axis=1)
    df_temp = df_temp.set_index(["grdc","time"])
    rows.append(df_temp)
    count += 1
    print(f"progress: {count/total*100} %")

out = pd.concat(rows)
out.to_csv("extracted.csv", index="grdc")

Area cropping:

Code Block
languagepy
titleScript
collapsetrue
import xarray as xr
from pyproj import Transformer, CRS
import numpy as np

# Rhine's basin bounding box
bbox = [50.972204, 46.296530, 5.450796, 11.871059]  # N,S,W,E

ds = xr.open_dataset("efas_reforecast.grib", engine="cfgrib")
uparea = xr.open_dataset("ec_uparea4.0.nc")

# replace x, y
ds["x"] = uparea["x"]
ds["y"] = uparea["y"]

# define reprojection parameters
laea_proj = CRS.from_proj4(
    "+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +units=m +no_defs"
)
transformer = Transformer.from_crs("epsg:4326", laea_proj, always_xy=True)

we = bbox[2:]
ns = bbox[:2]

we_xy, ns_xy = transformer.transform(we, ns)

we_xy = [np.floor(we_xy[0]), np.ceil(we_xy[1])]
ns_xy = [np.ceil(ns_xy[0]), np.floor(ns_xy[1])]

ds_cropped = ds.sel(
    xlongitude=slice(we_xybbox[02], we_xybbox[13]), ylatitude=slice(ns_xybbox[0], ns_xybbox[1])
)

ds_cropped.to_netcdf("efasglofas_forecasthistorical_cropped.nc")

...