Source code for SMBcorr.mar_extrap_seasonal

#!/usr/bin/env python
u"""
mar_extrap_seasonal.py
Written by Tyler Sutterley (09/2024)
Interpolates seasonal MAR products to times and coordinates
Seasonal files are climatology files for each day of the year

Uses fast nearest-neighbor search algorithms
https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.BallTree.html
https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KDTree.html
and inverse distance weighted interpolation to extrapolate spatially

INPUTS:
    DIRECTORY: full path to the MAR data directory
        <path_to_mar>/MARv3.11/Greenland/ERA_1958-2019-15km/daily_15km
        <path_to_mar>/MARv3.11/Greenland/NCEP1_1948-2020_20km/daily_20km
        <path_to_mar>/MARv3.10/Greenland/NCEP1_1948-2019_20km/daily_20km
        <path_to_mar>/MARv3.9/Greenland/ERA_1958-2018_10km/daily_10km
    EPSG: projection of input spatial coordinates
    tdec: dates to interpolate in year-decimal
    X: x-coordinates to interpolate in projection EPSG
    Y: y-coordinates to interpolate in projection EPSG

OPTIONS:
    XNAME: x-coordinate variable name in MAR netCDF4 file
    YNAME: x-coordinate variable name in MAR netCDF4 file
    TIMENAME: time variable name in MAR netCDF4 file
    VARIABLE: MAR product to interpolate
    RANGE: start year and end year of seasonal file
    SIGMA: Standard deviation for Gaussian kernel
    SEARCH: nearest-neighbor search algorithm (BallTree or KDTree)
    NN: number of nearest-neighbor points to use
    POWER: inverse distance weighting power
    FILL_VALUE: output fill_value for invalid points

PYTHON DEPENDENCIES:
    numpy: Scientific Computing Tools For Python
        https://numpy.org
        https://numpy.org/doc/stable/user/numpy-for-matlab-users.html
    scipy: Scientific Tools for Python
        https://docs.scipy.org/doc/
    netCDF4: Python interface to the netCDF C library
         https://unidata.github.io/netcdf4-python/netCDF4/index.html
    pyproj: Python interface to PROJ library
        https://pypi.org/project/pyproj/
    scikit-learn: Machine Learning in Python
        https://scikit-learn.org/stable/index.html
        https://github.com/scikit-learn/scikit-learn

UPDATE HISTORY:
    Updated 09/2024: use wrapper to importlib for optional dependencies
    Updated 08/2022: updated docstrings to numpy documentation format
    Updated 01/2021: using conversion protocols following pyproj-2 updates
        https://pyproj4.github.io/pyproj/stable/gotchas.html
    Written 06/2020
"""
from __future__ import print_function

import sys
import os
import re
import numpy as np
import scipy.spatial
import scipy.ndimage
import scipy.interpolate
import SMBcorr.spatial
import SMBcorr.utilities

# attempt imports
netCDF4 = SMBcorr.utilities.import_dependency('netCDF4')
pyproj = SMBcorr.utilities.import_dependency('pyproj')

# PURPOSE: read and interpolate a seasonal field of MAR outputs
[docs]def extrapolate_mar_seasonal(DIRECTORY, EPSG, VERSION, tdec, X, Y, XNAME=None, YNAME=None, TIMENAME='TIME', VARIABLE='SMB', RANGE=[2000,2019], SEARCH='BallTree', NN=10, POWER=2.0, SIGMA=1.5, FILL_VALUE=None): """ Spatially extrapolate daily climatologies of MAR products Parameters ---------- DIRECTORY: str Working data directory EPSG: str or int input coordinate reference system VERSION: str MAR Version - ``v3.5.2`` - ``v3.9`` - ``v3.10`` - ``v3.11`` tdec: float time coordinates to interpolate in year-decimal X: float x-coordinates to interpolate Y: float y-coordinates to interpolate VARIABLE: str, default 'SMB' MAR product to interpolate - ``SMB``: Surface Mass Balance - ``PRECIP``: Precipitation - ``SNOWFALL``: Snowfall - ``RAINFALL``: Rainfall - ``RUNOFF``: Melt Water Runoff - ``SNOWMELT``: Snowmelt - ``REFREEZE``: Melt Water Refreeze - ``SUBLIM``: Sublimation XNAME: str or NoneType, default None Name of the x-coordinate variable YNAME: str or NoneType, default None Name of the y-coordinate variable TIMENAME: str or NoneType, default 'TIME' Name of the time variable RANGE: list Start and end year of seasonal SEARCH: str, default 'BallTree' nearest-neighbor search algorithm NN: int, default 10 number of nearest-neighbor points to use POWER: int or float, default 2.0 Inverse distance weighting power SIGMA: float, default 1.5 Standard deviation for Gaussian kernel FILL_VALUE: float or NoneType, default None Output fill_value for invalid points Default will use fill values from data file """ # regular expression pattern for MAR dataset rx = re.compile('MARseasonal(.*?){0}-{1}.nc$'.format(*RANGE)) # find mar seasonal file for RANGE FILE, = [f for f in os.listdir(DIRECTORY) if rx.match(f)] # Open the MAR NetCDF file for reading with netCDF4.Dataset(os.path.join(DIRECTORY,FILE), 'r') as fileID: nx = len(fileID.variables[XNAME][:]) ny = len(fileID.variables[YNAME][:]) # add 1 to use january 1st as day 366 nt = len(fileID.variables[TIMENAME][:]) + 1 # python dictionary with file variables fd = {} fd['TIME'] = np.arange(nt)/365.0 # create a masked array with all data fd[VARIABLE] = np.ma.zeros((nt,ny,nx),fill_value=FILL_VALUE) fd[VARIABLE].mask = np.zeros((nt,ny,nx),dtype=bool) # python dictionary with gaussian filtered variables gs = {} # use a gaussian filter to smooth each model field gs[VARIABLE] = np.ma.zeros((nt,ny,nx), fill_value=FILL_VALUE) gs[VARIABLE].mask = np.ones((nt,ny,nx), dtype=bool) # calculate cumulative sum of gaussian filtered values cumulative = np.zeros((ny,nx)) gs['CUMULATIVE'] = np.ma.zeros((nt,ny,nx), fill_value=FILL_VALUE) gs['CUMULATIVE'].mask = np.ones((nt,ny,nx), dtype=bool) # Open the MAR NetCDF file for reading with netCDF4.Dataset(os.path.join(DIRECTORY,FILE), 'r') as fileID: # surface type SRF=fileID.variables['SRF'][:] # indices of specified ice mask i,j=np.nonzero(SRF == 4) # ice fraction FRA=fileID.variables['FRA'][:]/100.0 # Get data from netCDF variable and remove singleton dimensions tmp=np.squeeze(fileID.variables[VARIABLE][:]) # combine sectors for multi-layered data if (np.ndim(tmp) == 4): # create mask for combining data MASK=np.zeros((nt,ny,nx)) MASK[:,i,j]=FRA[:,0,i,j] # combine data fd[VARIABLE][:-1,:,:] = MASK*tmp[:,0,:,:] + \ (1.0-MASK)*tmp[:,1,:,:] else: # copy data fd[VARIABLE][:-1,:,:] = tmp.copy() # use january 1st as time 366 fd[VARIABLE][-1,:,:] = np.copy(fd[VARIABLE][0,:,:]) # verify mask object for interpolating data surf_mask = np.broadcast_to(SRF, (nt,ny,nx)) fd[VARIABLE].mask[:,:,:] |= (surf_mask != 4) # combine mask object through time to create a single mask fd['MASK']=1.0-np.any(fd[VARIABLE].mask,axis=0).astype(np.float64) # MAR coordinates fd['LON']=fileID.variables['LON'][:,:].copy() fd['LAT']=fileID.variables['LAT'][:,:].copy() # convert x and y coordinates to meters fd['x']=1000.0*fileID.variables[XNAME][:].copy() fd['y']=1000.0*fileID.variables[YNAME][:].copy() # use a gaussian filter to smooth mask gs['MASK']=scipy.ndimage.gaussian_filter(fd['MASK'],SIGMA, mode='constant',cval=0) # indices of smoothed ice mask ii,jj = np.nonzero(np.ceil(gs['MASK']) == 1.0) # for each time for t in range(nt): # replace fill values before smoothing data temp1 = np.zeros((ny,nx)) i,j = np.nonzero(~fd[VARIABLE].mask[t,:,:]) temp1[i,j] = fd[VARIABLE][t,i,j].copy() # smooth spatial field temp2 = scipy.ndimage.gaussian_filter(temp1, SIGMA, mode='constant', cval=0) # scale output smoothed field gs[VARIABLE].data[t,ii,jj] = temp2[ii,jj]/gs['MASK'][ii,jj] # replace valid values with original gs[VARIABLE].data[t,i,j] = temp1[i,j] # set mask variables for time gs[VARIABLE].mask[t,ii,jj] = False # calculate cumulative cumulative[ii,jj] += gs[VARIABLE][t,ii,jj] gs['CUMULATIVE'].data[t,ii,jj] = np.copy(cumulative[ii,jj]) gs['CUMULATIVE'].mask[t,ii,jj] = False # convert MAR latitude and longitude to input coordinates (EPSG) crs1 = pyproj.CRS.from_string(EPSG) crs2 = pyproj.CRS.from_epsg(4326) transformer = pyproj.Transformer.from_crs(crs1, crs2, always_xy=True) direction = pyproj.enums.TransformDirection.INVERSE # convert projection from model coordinates xg,yg = transformer.transform(fd['LON'], fd['LAT'], direction=direction) # construct search tree from original points # can use either BallTree or KDTree algorithms xy1 = np.concatenate((xg[i,j,None],yg[i,j,None]),axis=1) tree = SMBcorr.spatial.build_tree(xy1, SEARCH=SEARCH) # calculate the modulus of the time in year-decimal tmod = tdec % 1 # number of output data points npts = len(tdec) # output interpolated arrays of output variable extrap = np.ma.zeros((npts),fill_value=FILL_VALUE,dtype=np.float64) extrap.mask = np.ones((npts),dtype=bool) # initially set all values to fill value extrap.data[:] = extrap.fill_value # find indices for linearly interpolating in time f = scipy.interpolate.interp1d(fd['TIME'], np.arange(nt), kind='linear') date_indice = f(tmod).astype(np.int64) # for each unique model date # linearly interpolate in time between two model maps # then then inverse distance weighting to extrapolate in space for k in np.unique(date_indice): kk, = np.nonzero(date_indice==k) count = np.count_nonzero(date_indice==k) # query the search tree to find the NN closest points xy2 = np.concatenate((X[kk,None],Y[kk,None]),axis=1) dist,indices = tree.query(xy2, k=NN, return_distance=True) # normalized weights if POWER > 0 (typically between 1 and 3) # in the inverse distance weighting power_inverse_distance = dist**(-POWER) s = np.sum(power_inverse_distance, axis=1) w = power_inverse_distance/np.broadcast_to(s[:,None],(count,NN)) # variable for times before and after tdec var1 = gs['CUMULATIVE'][k,i,j] var2 = gs['CUMULATIVE'][k+1,i,j] # linearly interpolate to date dt = (tmod[kk] - fd['TIME'][k])/(fd['TIME'][k+1] - fd['TIME'][k]) # spatially extrapolate using inverse distance weighting extrap.data[kk] = (1.0-dt)*np.sum(w*var1[indices],axis=1) + \ dt*np.sum(w*var2[indices], axis=1) # complete mask if any invalid in data invalid, = np.nonzero((extrap.data == extrap.fill_value) | np.isnan(extrap.data)) extrap.mask[invalid] = True # return the interpolated values return extrap