AWS Volumes to ARCO#

This example shows how to create a volume scan from multiple sweep files stored on AWS. The volume scan structure is based on tree-like hierarchical collection of xarray objects

Imports#

[1]:
import warnings
from datetime import datetime

import cartopy.crs as ccrs
import cmweather  # noqa
import fsspec
import matplotlib.pyplot as plt
import xarray as xr
from IPython.display import HTML
from matplotlib.animation import FuncAnimation

import xradar as xd

warnings.simplefilter("ignore")

Access radar data from the Colombian radar network on AWS#

Access data from the IDEAM bucket on AWS. Detailed information can be found here

[2]:
def create_query(date, radar_site):
    """
    Creates a string for quering the IDEAM radar files stored in AWS bucket
    :param date: date to be queried. e.g datetime(2021, 10, 3, 12). Datetime python object
    :param radar_site: radar site e.g. Guaviare
    :return: string with a IDEAM radar bucket format
    """
    if (date.hour != 0) and (date.hour != 0):
        return f"l2_data/{date:%Y}/{date:%m}/{date:%d}/{radar_site}/{radar_site[:3].upper()}{date:%y%m%d%H}"
    elif (date.hour != 0) and (date.hour == 0):
        return f"l2_data/{date:%Y}/{date:%m}/{date:%d}/{radar_site}/{radar_site[:3].upper()}{date:%y%m%d}"
    else:
        return f"l2_data/{date:%Y}/{date:%m}/{date:%d}/{radar_site}/{radar_site[:3].upper()}{date:%y%m%d}"
[3]:
date_query = datetime(2023, 4, 7, 3)
radar_name = "Barrancabermeja"
query = create_query(date=date_query, radar_site=radar_name)
str_bucket = "s3://s3-radaresideam/"
fs = fsspec.filesystem("s3", anon=True)
[4]:
query
[4]:
'l2_data/2023/04/07/Barrancabermeja/BAR23040703'
[5]:
radar_files = sorted(fs.glob(f"{str_bucket}{query}*"))
radar_files[:4]
[5]:
['s3-radaresideam/l2_data/2023/04/07/Barrancabermeja/BAR230407030004.RAW0LZ9',
 's3-radaresideam/l2_data/2023/04/07/Barrancabermeja/BAR230407030107.RAW0LZC',
 's3-radaresideam/l2_data/2023/04/07/Barrancabermeja/BAR230407030238.RAW0LZE',
 's3-radaresideam/l2_data/2023/04/07/Barrancabermeja/BAR230407030315.RAW0LZH']

Let’s check the elevation at each file using xradar.datatree module#

IDEAM radar network operates with a volume scan every five minutes. Each volume scan has four different tasks

  • SURVP “super-resolution” sweep at the lowest elevation angle, usually 0.5 deg, with 720 degrees in azimuth (every 0.5 deg)

  • PRECA task with 1.5, 2.4, 3.0, and 5.0 elevation angles and shorter range than SURVP

  • PRECB task with 6.4 and 8.0 elevation angles and a shorter range than the previous task

  • PRECC task with 10.0, 12.5, and 15.0 with a shorter range than all the previous tasks.

[6]:
# List of first four task files
task_files = [
    fsspec.open_local(
        f"simplecache::s3://{i}", s3={"anon": True}, filecache={"cache_storage": "."}
    )
    for i in radar_files[:4]
]
# list of xradar datatrees
ls_dt = [xd.io.open_iris_datatree(i).xradar.georeference() for i in task_files]

# sweeps and elevations within each task
for i in ls_dt:
    sweeps = list(i.children.keys())
    print(f"task sweeps: {sweeps}")
    for j in sweeps:
        if j.startswith("sweep"):
            print(
                f"{j}: {i[j].sweep_fixed_angle.values: .1f} [deg], {i[j].range.values[-1] / 1e3:.1f} [km]"
            )
    print("----------------------------------------------------------------")
task sweeps: ['sweep_0', 'radar_parameters', 'georeferencing_correction', 'radar_calibration']
sweep_0:  1.3 [deg], 298.9 [km]
----------------------------------------------------------------
task sweeps: ['sweep_0', 'sweep_1', 'sweep_2', 'sweep_3', 'radar_parameters', 'georeferencing_correction', 'radar_calibration']
sweep_0:  1.5 [deg], 224.8 [km]
sweep_1:  2.4 [deg], 224.8 [km]
sweep_2:  3.1 [deg], 224.8 [km]
sweep_3:  5.1 [deg], 224.8 [km]
----------------------------------------------------------------
task sweeps: ['sweep_0', 'sweep_1', 'radar_parameters', 'georeferencing_correction', 'radar_calibration']
sweep_0:  6.4 [deg], 175.0 [km]
sweep_1:  8.0 [deg], 175.0 [km]
----------------------------------------------------------------
task sweeps: ['sweep_0', 'sweep_1', 'sweep_2', 'radar_parameters', 'georeferencing_correction', 'radar_calibration']
sweep_0:  10.0 [deg], 98.9 [km]
sweep_1:  12.5 [deg], 98.9 [km]
sweep_2:  15.0 [deg], 98.9 [km]
----------------------------------------------------------------

Create a single-volume scan#

Let’s use the first four files, tasks SURVP, PRECA, PRECB, PRECC, to create a single volume scan using each task as a datatree. The new volume scan is a tree-like hierarchical object with all four tasks as children.

[7]:
vcp_dt = xr.DataTree(
    name="root",
    children=dict(SURVP=ls_dt[0], PRECA=ls_dt[1], PRECB=ls_dt[2], PRECC=ls_dt[3]),
)
[8]:
vcp_dt.groups
[8]:
('/',
 '/SURVP',
 '/PRECA',
 '/PRECB',
 '/PRECC',
 '/SURVP/sweep_0',
 '/SURVP/radar_parameters',
 '/SURVP/georeferencing_correction',
 '/SURVP/radar_calibration',
 '/PRECA/sweep_0',
 '/PRECA/sweep_1',
 '/PRECA/sweep_2',
 '/PRECA/sweep_3',
 '/PRECA/radar_parameters',
 '/PRECA/georeferencing_correction',
 '/PRECA/radar_calibration',
 '/PRECB/sweep_0',
 '/PRECB/sweep_1',
 '/PRECB/radar_parameters',
 '/PRECB/georeferencing_correction',
 '/PRECB/radar_calibration',
 '/PRECC/sweep_0',
 '/PRECC/sweep_1',
 '/PRECC/sweep_2',
 '/PRECC/radar_parameters',
 '/PRECC/georeferencing_correction',
 '/PRECC/radar_calibration')
[9]:
print(f"Size of data in tree = {vcp_dt.nbytes / 1e6 :.2f} MB")
Size of data in tree = 204.23 MB

PPI plot from the Datatree object#

Now that we have a tree-like hierarchical volume scan object. We can access data at each scan/sweep using dot method vcp_dt.SURVP or dictionary-key method vcp_dt['PRECB']

[10]:
fig, (ax, ax1) = plt.subplots(1, 2, figsize=(10, 4))
# dot method
vcp_dt.SURVP.sweep_0.DBZH.plot(
    x="x", y="y", cmap="ChaseSpectral", vmin=-10, vmax=50, ax=ax
)

ax.set_title(
    f"SURPV sweep_0 ({vcp_dt.SURVP.sweep_0.sweep_fixed_angle.values: .1f} [deg])"
)
m2km = lambda x, _: f"{x/1000:g}"
ax.xaxis.set_major_formatter(m2km)
ax.yaxis.set_major_formatter(m2km)
ax.set_ylabel(r"$North - South \ distance \ [km]$")
ax.set_xlabel(r"$East - West \ distance \ [km]$")

# Dictionary-key method
vcp_dt["PRECB"]["sweep_0"].DBZH.plot(
    x="x", y="y", cmap="ChaseSpectral", vmin=-10, vmax=50, ax=ax1
)

ax1.set_title(
    f"PRECB sweep_0 ({vcp_dt.PRECB.sweep_0.sweep_fixed_angle.values: .1f} [deg])"
)
m2km = lambda x, _: f"{x/1000:g}"
ax1.xaxis.set_major_formatter(m2km)
ax1.yaxis.set_major_formatter(m2km)
ax1.set_xlim(ax.get_xlim())
ax1.set_ylim(ax.get_ylim())
ax1.set_ylabel(r"$North - South \ distance \ [km]$")
ax1.set_xlabel(r"$East - West \ distance \ [km]$")
fig.tight_layout()
../_images/notebooks_multiple-sweeps-into-volume-scan_15_0.png

Multiple volumes scan into one datatree object#

Similarly, we can create a tree-like hierarchical object with multiple volume scans.

[11]:
def data_accessor(file):
    """
    Open AWS S3 file(s), which can be resolved locally by file caching
    """
    return fsspec.open_local(
        f"simplecache::s3://{file}",
        s3={"anon": True},
        filecache={"cache_storage": "./tmp/"},
    )


def create_vcp(ls_dt):
    """
    Creates a tree-like object for each volume scan
    """
    return xr.DataTree(
        name="root",
        children=dict(SURVP=ls_dt[0], PRECA=ls_dt[1], PRECB=ls_dt[2], PRECC=ls_dt[3]),
    )


def mult_vcp(radar_files):
    """
    Creates a tree-like object for multiple volumes scan every 4th file in the bucket
    """
    ls_files = [radar_files[i : i + 4] for i in range(len(radar_files)) if i % 4 == 0]
    ls_sigmet = [
        [xd.io.open_iris_datatree(data_accessor(i)).xradar.georeference() for i in j]
        for j in ls_files
    ]
    ls_dt = [create_vcp(i) for i in ls_sigmet]
    return xr.DataTree.from_dict({f"vcp_{idx}": i for idx, i in enumerate(ls_dt)})
[12]:
# let's test it using the first 24 files in the bucket. We can include more files for visualization. e.g. radar_files[:96]
vcps_dt = mult_vcp(radar_files[:24])

Now we have 6 vcps in one tree-like hierarchical object.

[13]:
list(vcps_dt.keys())
[13]:
['vcp_0', 'vcp_1', 'vcp_2', 'vcp_3', 'vcp_4', 'vcp_5']
[14]:
print(f"Size of data in tree = {vcps_dt.nbytes / 1e9 :.2f} GB")
Size of data in tree = 1.23 GB

PPI animation using the lowest elevation angle#

We can create an animation using the FuncAnimation module from matplotlib package

[15]:
fig, ax = plt.subplots(subplot_kw={"projection": ccrs.PlateCarree()})
proj_crs = xd.georeference.get_crs(vcps_dt.vcp_1.SURVP)
cart_crs = ccrs.Projection(proj_crs)
sc = vcps_dt.vcp_1.SURVP.sweep_0.DBZH.plot.pcolormesh(
    x="x",
    y="y",
    vmin=-10,
    vmax=50,
    cmap="ChaseSpectral",
    edgecolors="face",
    transform=cart_crs,
    ax=ax,
)

title = f"SURVP - {vcps_dt.vcp_1.SURVP.sweep_0.sweep_fixed_angle.values: .1f} [deg]"
ax.set_title(title)
gl = ax.gridlines(
    crs=ccrs.PlateCarree(),
    draw_labels=True,
    linewidth=1,
    color="gray",
    alpha=0.3,
    linestyle="--",
)
plt.gca().xaxis.set_major_locator(plt.NullLocator())
gl.top_labels = False
gl.right_labels = False
ax.coastlines()


def update_plot(vcp):
    sc.set_array(vcps_dt[vcp].SURVP.sweep_0.DBZH.values.ravel())


ani = FuncAnimation(fig, update_plot, frames=list(vcps_dt.keys()), interval=150)
plt.close()
HTML(ani.to_html5_video())
[15]:

Bonus!!#

Analysis-ready data, cloud-optimized (ARCO) format#

Tree-like hierarchical data can be stored using ARCO format.

[16]:
zarr_store = "./multiple_vcp_test.zarr"
_ = vcps_dt.to_zarr(zarr_store)

ARCO format can be read by using open_datatree module

[17]:
vcps_back = xr.open_datatree(zarr_store, engine="zarr")
[18]:
display(vcps_back)
<xarray.DatasetView> Size: 0B
Dimensions:  ()
Data variables:
    *empty*