ARCO ERA5-Land de-accumulationIn this notebook, we show the procedure to de-accumulate the data of an accumulated ERA5-Land variable, then show how to accumulate it back and later prove the procedure by reconstructing the data and comparing it against the original one. Code Block |
---|
| from copy import deepcopy
import xarray as xr
import numpy as np
import cdsapi |
Definitions Code Block |
---|
| def flatten_dataset(ds, var, time_selection=[]):
"""Rearange the data so we flatten time samples.
E.g. from
{'time': 2, 'step': 24, 'latitude': 1801, 'longitude': 3600}
to
{'time': 48, 'latitude': 1801, 'longitude': 3600}
"""
total_samples = ds.valid_time.shape[0]*ds.valid_time.shape[1]
reshaped_var = ds[var].values.reshape((total_samples, ds[var].shape[2], ds[var].shape[3]))
ds_new = xr.Dataset(
data_vars={var: (["time", "latitude", "longitude"], reshaped_var)},
coords={
"time": ("time", ds.valid_time.values.reshape((total_samples))),
"latitude": ("latitude", ds.latitude.values),
"longitude": ("longitude", ds.longitude.values),
},
)
ds_new.attrs = ds.attrs
ds_new[var].attrs = ds[var].attrs
ds_new.longitude.attrs = ds.longitude.attrs
ds_new.latitude.attrs = ds.latitude.attrs
ds_new.time.attrs = ds.valid_time.attrs
if time_selection is not None:
ds_new = ds_new.isel(time=slice(*time_selection))
# Remove additional variables, same as in ARCO hourly ERA5 on single levels
for drop_var in ["number", "surface"]:
if drop_var in ds_new:
ds_new = ds_new.drop_vars([drop_var])
return ds_new
def apply_lon_lat_conventions(ds):
# Renames
if "lon" in ds.dims:
ds = ds.rename({"lon": "longitude"})
if "lat" in ds.dims:
ds = ds.rename({"lat": "latitude"})
# Flip latitudes (ensure they are monotonic increasing)
if "latitude" in ds.dims:
lats = ds["latitude"]
if len(lats) > 1 and lats[0] > lats[-1]:
ds = ds.reindex(latitude=ds.latitude[::-1])
# Convert longitude to [-180, 180[
if "longitude" in ds.dims and ds["longitude"].max() > 180:
lons = ds["longitude"]
lons_attrs = lons.attrs
new_lons = np.concatenate([lons[lons >= 180], lons[lons < 180]])
ds = ds.reindex(longitude=new_lons)
ds = ds.assign_coords(longitude=(((ds["longitude"] + 180) % 360) - 180))
ds["longitude"].attrs = lons_attrs
return ds |
Data Code Block |
---|
| variable = "tp"
# CDS API
token = "" # Your CDS API Token
dataset = "reanalysis-era5-land"
request = {
"variable": ["total_precipitation"],
"year": "1970",
"month": "01",
"day": ["01"],
"time": [
"00:00", "01:00", "02:00",
"03:00", "04:00", "05:00",
"06:00", "07:00", "08:00",
"09:00", "10:00", "11:00",
"12:00", "13:00", "14:00",
"15:00", "16:00", "17:00",
"18:00", "19:00", "20:00",
"21:00", "22:00", "23:00"
],
"data_format": "grib",
"download_format": "unarchived"
}
client = cdsapi.Client(url="https://cds.climate.copernicus.eu/api", key=token)
client.retrieve(dataset, request).download("cdsapi_era5-land-date-1.grib")
request["day"] = ["02"]
client.retrieve(dataset, request).download("cdsapi_era5-land-date0.grib")
ds0 = xr.open_dataset("cdsapi_era5-land-date0.grib", filter_by_keys={'stepType': 'accum'})
ds_1 = xr.open_dataset("cdsapi_era5-land-date-1.grib", filter_by_keys={'stepType': 'accum'})
ds0 = apply_lon_lat_conventions(ds0)
ds_1 = apply_lon_lat_conventions(ds_1) |
De-accumulation Code Block |
---|
| # Deaccumulate
# We want to finally obtain a Dataset that contains 24 samples, them being:
# [
# 00:00 <date-1, step24> - <date-1, step23>,
# 01:00 <date0, step1>,
# 02:00 <date0, step2> - <date0, step1>,
# 03:00 <date0, step3> - <date0, step2>,
# ...
# 23:00 <date0, step23> - <date0, step22>
# ]
def deaccumulate(ds_1, ds0, variable):
""" De-accumulate the variable of one Dataset
Args:
ds_1: Date -1 (e.g. 1970-01-01) dataset.
ds0: Date 0 (e.g. 1970-01-02) dataset.
variable: The dataset variable
Returns:
A de-accumulated Dataset.
"""
ds_1_flat = flatten_dataset(ds_1, variable, None)
ds0_flat = flatten_dataset(ds0, variable, None)
ds_deaccumulated = deepcopy(ds0_flat)
# Array indexes
# ds-1, date-1, step23
i_ds_1_d_1_s23 = 46
# ds0, date-1, step24
i_ds0_d_1_s24 = 23
# ds0, date0, step1
i_ds0_d0_s1 = 24
# 00:00 <date-1, step24> - <date-1, step23>
ds_deaccumulated[variable][i_ds0_d_1_s24] -= ds_1_flat[variable][i_ds_1_d_1_s23]
# Nothing to do for 01:00 <date0, step1>,
# From
# 02:00 <date0, step2> - <date0, step1>,
# to
# 23:00 <date0, step23> - <date0, step22>
for i, t in enumerate(range(ds_deaccumulated.time.shape[0])):
if i > i_ds0_d0_s1:
ds_deaccumulated[variable][t] -= ds0_flat[variable][t - 1]
# Slice to the times of interest (24 samples)
ds_deaccumulated = ds_deaccumulated.isel(time=slice(i_ds0_d_1_s24, i_ds_1_d_1_s23 + 1))
return ds_deaccumulated
ds_deaccumulated = deaccumulate(ds_1, ds0, variable) |
Accumulation Code Block |
---|
| # We revert de de-accumulation to obtain the original values
def accumulate(ds0_deaccumulated, variable, ds_1_accumulated=None, ds_1_deaccumulated=None):
"""Accumulate a variable over each time sample.
Args:
ds0_deaccumulated: Date 0 (e.g. 1970-01-02) de-accumulated
variable: Dataset variable
ds_1_accumulated: Date -1 (e.g. 1970-01-01) accumulated. Expected time and steps dimensions.
Needed either this or ds_1_deaccumulated
ds_1_deaccumulated: Date -1 (e.g. 1970-01-01) de-accumulated. Expected time dimension.
Needed either this or ds_1_accumulated
Returns:
Date 0 xarray.Dataset
"""
ds0_accumulated = deepcopy(ds0_deaccumulated)
if ds_1_accumulated:
ds0_accumulated[variable][0] += ds_1_accumulated[variable].values[1][22]
elif ds_1_deaccumulated:
for i in range(1, 24):
ds0_accumulated[variable][0] += ds_1_deaccumulated[variable].values[i]
for step in range(2, 24):
tmp = deepcopy(ds0_deaccumulated[variable].values[1])
for acc_step in range(2, step+1):
tmp += ds0_deaccumulated[variable].values[acc_step]
ds0_accumulated[variable][step] = tmp
return ds0_accumulated
ds_accumulated = accumulate(ds_deaccumulated, variable, ds_1_accumulated=ds_1) |
Check correctness Code Block |
---|
| # 00:00
np.allclose(ds_accumulated.tp.values[0], ds0.tp.values[0][23], equal_nan=True)
# From
# 01:00
# to
# 23:00
for step in range(1, 24):
print("Time", f"{step}:00")
print(np.allclose(ds_accumulated.tp[step], ds0.tp.values[1][step-1], equal_nan=True))
print() |
|