page under construction ------------------------------
There are many situations where a user is only interested in a subset of the dataset spatial domain.
For example, when comparing modelled river flow against observations, it is reasonable to being able to extract the timeseries at those point coordinates rather than dealing with many GB of data.
Similarly, when the focus is on a specific catchment it is likely that you want only that part of the spatial domain.
In summary, there two operations that are very popular on CEMS-Flood datasets:
There are different ways to perform those operations:
import cdsapi from datetime import datetime, timedelta def get_monthsdays(start =[2019,1,1],end=[2019,12,31]): # reforecast time index start, end = datetime(*start),datetime(*end) days = [start + timedelta(days=i) for i in range((end - start).days + 1)] monthday = [d.strftime("%B-%d").split("-") for d in days if d.weekday() in [0,3] ] return monthday if __name__ == '__main__': c = cdsapi.Client() # station coordinates (lat,lon) COORDS = { "Thames":[51.35,-0.45] } # select date index corresponding to the event MONTHSDAYS = get_monthsdays(start =[2019,7,11],end=[2019,7,11]) YEAR = '2007' LEADTIMES = ['%d'%(l) for l in range(24,1128,24)] # loop over date index (just 1 in this case) for md in MONTHSDAYS: month = md[0].lower() day = md[1] # loop over station coordinates for station in COORDS: station_point_coord = COORDS[station]*2 # coordinates input for the area keyword c.retrieve( 'cems-glofas-reforecast', { 'system_version': 'version_2_2', 'variable': 'river_discharge_in_the_last_24_hours', 'format': 'grib', 'hydrological_model': 'htessel_lisflood', 'product_type': ['control_reforecast','ensemble_perturbed_reforecasts'], 'area':station_point_coord, 'hyear': YEAR, 'hmonth': month , 'hday': day , 'leadtime_hour': LEADTIMES, }, f'glofas_reforecast_{station}_{month}_{day}.grib') |
## === retrieve GloFAS Medium-Range Reforecast === ## === subset India, Pakistan, Nepal and Bangladesh region === import cdsapi from datetime import datetime, timedelta def get_monthsdays(): start, end = datetime(2019, 1, 1), datetime(2019, 12, 31) days = [start + timedelta(days=i) for i in range((end - start).days + 1)] monthday = [d.strftime("%B-%d").split("-") for d in days if d.weekday() in [0,3] ] return monthday MONTHSDAYS = get_monthsdays() if __name__ == '__main__': c = cdsapi.Client() # user inputs BBOX = [40.05 ,59.95, 4.95, 95.05] # North West South East YEARS = ['%d'%(y) for y in range(1999,2019)] LEADTIMES = ['%d'%(l) for l in range(24,1128,24)] # submit request for md in MONTHSDAYS: month = md[0].lower() day = md[1] c.retrieve( 'cems-glofas-reforecast', { 'system_version': 'version_2_2', 'variable': 'river_discharge_in_the_last_24_hours', 'format': 'grib', 'hydrological_model': 'htessel_lisflood', 'product_type': 'control_reforecast', 'area': BBOX,# < - subset 'hyear': YEARS, 'hmonth': month , 'hday': day , 'leadtime_hour': LEADTIMES, }, f'glofas_reforecast_{month}_{day}.grib') |
## === retrieve GloFAS Seasonal Forecast === ## === subset South America/Amazon region === import cdsapi if __name__ == '__main__': c = cdsapi.Client() YEARS = ['%d'%(y) for y in range(2020,2022)] MONTHS = ['%02d'%(m) for m in range(1,13)] LEADTIMES = ['%d'%(l) for l in range(24,2976,24)] for year in YEARS: for month in MONTHS: c.retrieve( 'cems-glofas-seasonal', { 'variable': 'river_discharge_in_the_last_24_hours', 'format': 'grib', 'year': year, 'month': '12' if year == '2020' else month, 'leadtime_hour': LEADTIMES, 'area': [ 10.95, -90.95, -30.95, -29.95 ] }, f'glofas_seasonal_{year}_{month}.grib') |
## === retrieve GloFAS Seasonal Reforecast === ## === subset South America/Amazon region === import cdsapi if __name__ == '__main__': c = cdsapi.Client() YEARS = ['%d'%(y) for y in range(1981,2021)] MONTHS = ['january', 'february', 'march', 'april', 'may', 'june', 'july', 'august', 'september', 'october', 'november', 'december'] LEADTIMES = ['%d'%(l) for l in range(24,2976,24)] for year in YEARS: for month in MONTHS: c.retrieve( 'cems-glofas-seasonal-reforecast', { 'system_version': 'version_2_2', 'variable':'river_discharge_in_the_last_24_hours', 'format':'grib', 'hydrological_model':'htessel_lisflood', 'hyear': year, 'hmonth': month, 'leadtime_hour': LEADTIMES, 'area': [ 10.95, -90.95, -30.95, -29.95 ] }, f'glofas_seasonal_reforecast_{year}_{month}.grib') |
import cdsapi c = cdsapi.Client() c.retrieve( 'cems-glofas-historical', { 'variable': 'river_discharge_in_the_last_24_hours', 'format': 'grib', 'hydrological_model': 'lisflood', 'product_type': 'intermediate', 'hyear': '2021', 'hmonth': 'january', 'hday': [ '01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', ], 'system_version': 'version_3_1', }, 'glofas_historical.grib') |
import xarray as xr import pandas as pd parameter = "dis24" ds = xr.open_dataset("glofas_historical.grib", engine="cfgrib",backend_kwargs={'time_dims':['time']}) df = pd.read_csv("GRDC.csv") total = len(df) rows = [] count = 0 for lon, lat, id in zip(df.long, df.lat, df.grdc_no): extracted = ds.sel(longitude=lon, latitude=lat, method="nearest")[parameter] df_temp = extracted.drop_vars(["surface"]).to_dataframe().reset_index() df_temp["grdc"] = str(id) df_temp = df_temp.set_index(["grdc", "time"]) rows.append(df_temp) count += 1 print(f"progress: {count/total*100} %") out = pd.concat(rows) out.to_csv("extracted.csv", index="grdc") |
import xarray as xr # Rhine's basin bounding box bbox = [50.972204, 46.296530, 5.450796, 11.871059] # N,S,W,E ds = xr.open_dataset("glofas_historical.grib", engine="cfgrib") ds_cropped = ds.sel( longitude=slice(bbox[2], bbox[3]), latitude=slice(bbox[0], bbox[1]) ) ds_cropped.to_netcdf("glofas_historical_cropped.nc") |
When transforming from lat/lon (source coordinates) to projected LAEA (target coordinates), you need to consider that the number of decimal places of the source coordinates affects the target coordinates precision: An interval of 0.001 degrees corresponds to about 100 metres in LAEA. An interval of 0.00001 degrees corresponds to about 1 metre in LAEA. |
to update once cropping works....
import cdsapi c = cdsapi.Client() c.retrieve( 'efas-reforecast', { 'format': 'grib', 'product_type': 'ensemble_perturbed_reforecasts', 'variable': 'river_discharge_in_the_last_6_hours', 'model_levels': 'surface_level', 'hyear': '2007', 'hmonth': 'march', 'hday': [ '04', '07', ], 'leadtime_hour': [ '0', '12', '18', '6', ], }, 'efas_reforecast.grib') |
We are going to extract EFAS reforecast's timeseries at locations defined by latitude and longitude coordinates from a tiny subset of the GRDC dataset.
EFAS's x and y coordinates, when converted from GRIB to NetCDF, are not projected coordinates but matrix indexes (i, j), It is necessary to download the upstream area static file that contains the projected coordinates and replace it in EFAS. |
Copy the content into an empty file named "GRDC.csv", the file should reside in the same folder of the efas_reforecast.grib file and the upstream area.
grdc_no,wmo_reg,sub_reg,river,station,country,lat,long,area,altitude 6118010,6,18,CLAIE,SAINT-JEAN-BREVELAY,FR,47.824831,-2.703308,137.0,99.98 6118015,6,18,YVEL,LOYAT (PONT D129),FR,47.993815,-2.368849,315.0,88.26 6118020,6,18,"ARON, RUISSEAU D'",GRAND-FOUGERAY (LA BERNADAISE),FR,47.71222,-1.690835,118.0,68.0 6118025,6,18,"CANUT, RUISSEAU DE",SAINT-JUST (LA RIVIERE COLOMBEL),FR,47.775309,-1.979609,37.0,80.22 6118030,6,18,AFF,PAIMPONT (PONT DU SECRET),FR,47.981631,-2.143762,30.2,119.01 6118050,6,18,COET-ORGAN,QUISTINIC (KERDEC),FR,47.904164,-3.201265,47.7,94.42 6118060,6,18,EVEL,GUENIN,FR,47.899928,-2.975167,316.0,95.16 6118070,6,18,STER-GOZ,BANNALEC (PONT MEYA),FR,47.906833,-3.752172,69.7,85.08 6118080,6,18,MOROS,CONCARNEAU (PONT D22),FR,47.882934,-3.875375,20 |
import xarray as xr import pandas as pd from pyproj import Transformer,CRS parameter = "dis06" ds = xr.open_dataset("efas_reforecast.grib", engine="cfgrib") df = pd.read_csv("GRDC.csv") uparea = xr.open_dataset("ec_uparea4.0.nc") # the upstream area # replace x, y ds["x"] = uparea["x"] ds["y"] = uparea["y"] # define reprojection parameters laea_proj = CRS.from_proj4("+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +units=m +no_defs") transformer = Transformer.from_crs('epsg:4326', laea_proj, always_xy=True) total = len(df) rows = [] count = 0 for lon, lat, id in zip(df.long, df.lat, df.grdc_no): x1, y1 = transformer.transform(lon, lat) extracted = ds.sel(x=x1, y=y1, number = 1, method="nearest")[parameter] df_temp = extracted.drop_vars(["surface", "number"]).to_dataframe().reset_index() df_temp["grdc"] = str(id) df_temp = df_temp.drop("step", axis=1) df_temp = df_temp.set_index(["grdc","time"]) rows.append(df_temp) count += 1 print(f"progress: {count/total*100} %") out = pd.concat(rows) out.to_csv("extracted.csv", index="grdc") |
grdc,time,y,x,latitude,longitude,valid_time,dis06 6118010,2007-03-04,2827500.0,3372500.0,47.82327782578349,-2.7316459165737292,2007-03-04 00:00:00,9.68457 6118010,2007-03-04,2827500.0,3372500.0,47.82327782578349,-2.7316459165737292,2007-03-04 06:00:00,9.5390625 6118010,2007-03-04,2827500.0,3372500.0,47.82327782578349,-2.7316459165737292,2007-03-04 12:00:00,9.783691 6118010,2007-03-04,2827500.0,3372500.0,47.82327782578349,-2.7316459165737292,2007-03-04 18:00:00,9.83252 6118010,2007-03-07,2827500.0,3372500.0,47.82327782578349,-2.7316459165737292,2007-03-07 00:00:00,11.904785 6118010,2007-03-07,2827500.0,3372500.0,47.82327782578349,-2.7316459165737292,2007-03-07 06:00:00,12.73584 6118010,2007-03-07,2827500.0,3372500.0,47.82327782578349,-2.7316459165737292,2007-03-07 12:00:00,12.832031 6118010,2007-03-07,2827500.0,3372500.0,47.82327782578349,-2.7316459165737292,2007-03-07 18:00:00,13.336914 6118015,2007-03-04,2842500.0,3402500.0,48.001715448932764,-2.3682021184138047,2007-03-04 00:00:00,11.508301 6118015,2007-03-04,2842500.0,3402500.0,48.001715448932764,-2.3682021184138047,2007-03-04 06:00:00,11.17334 6118015,2007-03-04,2842500.0,3402500.0,48.001715448932764,-2.3682021184138047,2007-03-04 12:00:00,11.09082 6118015,2007-03-04,2842500.0,3402500.0,48.001715448932764,-2.3682021184138047,2007-03-04 18:00:00,11.20752 6118015,2007-03-07,2842500.0,3402500.0,48.001715448932764,-2.3682021184138047,2007-03-07 00:00:00,14.920898 6118015,2007-03-07,2842500.0,3402500.0,48.001715448932764,-2.3682021184138047,2007-03-07 06:00:00,15.956055 6118015,2007-03-07,2842500.0,3402500.0,48.001715448932764,-2.3682021184138047,2007-03-07 12:00:00,16.127441 6118015,2007-03-07,2842500.0,3402500.0,48.001715448932764,-2.3682021184138047,2007-03-07 18:00:00,15.900879 6118020,2007-03-04,2802500.0,3447500.0,47.71290346585623,-1.6892419697226784,2007-03-04 00:00:00,13.782227 |
import xarray as xr from pyproj import Transformer, CRS import numpy as np # Rhine's basin bounding box bbox = [50.972204, 46.296530, 5.450796, 11.871059] # N,S,W,E ds = xr.open_dataset("efas_reforecast.grib", engine="cfgrib") uparea = xr.open_dataset("ec_uparea4.0.nc") # replace x, y ds["x"] = uparea["x"] ds["y"] = uparea["y"] # define reprojection parameters laea_proj = CRS.from_proj4( "+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +units=m +no_defs" ) transformer = Transformer.from_crs("epsg:4326", laea_proj, always_xy=True) we = bbox[2:] ns = bbox[:2] we_xy, ns_xy = transformer.transform(we, ns) we_xy = [np.floor(we_xy[0]), np.ceil(we_xy[1])] ns_xy = [np.ceil(ns_xy[0]), np.floor(ns_xy[1])] ds_cropped = ds.sel( x=slice(we_xy[0], we_xy[1]), y=slice(ns_xy[0], ns_xy[1]) ) ds_cropped.to_netcdf("efas_forecast_cropped.nc") |