Source code for SMBcorr.racmo_extrap_firn_height
#!/usr/bin/env python
u"""
racmo_extrap_firn_height.py
Written by Tyler Sutterley (09/2024)
Spatially extrapolates RACMO firn heights
Uses fast nearest-neighbor search algorithms
https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.BallTree.html
https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KDTree.html
and inverse distance weighted interpolation to extrapolate spatially
INPUTS:
base_dir: working data directory
EPSG: projection of input spatial coordinates
MODEL: model outputs to interpolate
FGRN055: 5.5km Greenland RACMO2.3p2
FGRN11: 11km Greenland RACMO2.3p2
XANT27: 27km Antarctic RACMO2.3p2
ASE055: 5.5km Amundsen Sea Embayment RACMO2.3p2
XPEN055: 5.5km Antarctic Peninsula RACMO2.3p2
tdec: dates to interpolate in year-decimal
X: x-coordinates to interpolate in projection EPSG
Y: y-coordinates to interpolate in projection EPSG
OPTIONS:
SEARCH: nearest-neighbor search algorithm (BallTree or KDTree)
NN: number of nearest-neighbor points to use
POWER: inverse distance weighting power
VARIABLE: RACMO product to interpolate
zs: firn height
FirnAir: firn air content
SIGMA: Standard deviation for Gaussian kernel
FILL_VALUE: output fill_value for invalid points
REFERENCE: calculate firn variables in reference to first field
PYTHON DEPENDENCIES:
numpy: Scientific Computing Tools For Python
https://numpy.org
https://numpy.org/doc/stable/user/numpy-for-matlab-users.html
scipy: Scientific Tools for Python
https://docs.scipy.org/doc/
netCDF4: Python interface to the netCDF C library
https://unidata.github.io/netcdf4-python/netCDF4/index.html
pyproj: Python interface to PROJ library
https://pypi.org/project/pyproj/
scikit-learn: Machine Learning in Python
https://scikit-learn.org/stable/index.html
https://github.com/scikit-learn/scikit-learn
PROGRAM DEPENDENCIES:
regress_model.py: models a time series using least-squares regression
UPDATE HISTORY:
Updated 09/2024: use wrapper to importlib for optional dependencies
Updated 02/2023: don't recompute min and max time cutoffs for cases
Updated 08/2022: updated docstrings to numpy documentation format
Updated 01/2021: using conversion protocols following pyproj-2 updates
https://pyproj4.github.io/pyproj/stable/gotchas.html
Updated 04/2020: reduced to interpolation function. output masked array
Updated 10/2019: Gaussian average firn fields before interpolation
Updated 09/2019: use scipy interpolate to find date indices
Forked 08/2019 from racmo_interp_firn_height.py
Updated 08/2019: convert to model coordinates (rotated pole lat/lon)
and interpolate using N-dimensional functions
added rotation parameters for Antarctic models (XANT27,ASE055,XPEN055)
added option to change the fill value for invalid points
Written 07/2019
"""
from __future__ import print_function
import sys
import os
import re
import warnings
import numpy as np
import scipy.ndimage
import scipy.interpolate
from SMBcorr.regress_model import regress_model
import SMBcorr.spatial
import SMBcorr.utilities
# attempt imports
netCDF4 = SMBcorr.utilities.import_dependency('netCDF4')
pyproj = SMBcorr.utilities.import_dependency('pyproj')
# PURPOSE: read and interpolate RACMO2.3 firn corrections
[docs]def extrapolate_racmo_firn(base_dir, EPSG, MODEL, tdec, X, Y,
VARIABLE='zs', SEARCH='BallTree', NN=10, POWER=2.0, SIGMA=1.5,
FILL_VALUE=None, REFERENCE=False):
"""
Spatially extrapolates RACMO firn heights
Parameters
----------
base_dir: str
Working data directory
EPSG: str or int
input coordinate reference system
MODEL: str
RACMO firn model
- ``FGRN055``: 5.5km Greenland RACMO2.3p2
- ``FGRN11``: 11km Greenland RACMO2.3p2
- ``XANT27``: 27km Antarctic RACMO2.3p2
- ``ASE055``: 5.5km Amundsen Sea Embayment RACMO2.3p2
- ``XPEN055``: 5.5km Antarctic Peninsula RACMO2.3p2
tdec: float
time coordinates to interpolate in year-decimal
X: float
x-coordinates to interpolate
Y: float
y-coordinates to interpolate
VARIABLE: str, default 'zs'
RACMO product to interpolate
- ``zs``: Firn height
- ``FirnAir``: Firn air content
SEARCH: str, default 'BallTree'
nearest-neighbor search algorithm
NN: int, default 10
number of nearest-neighbor points to use
POWER: int or float, default 2.0
Inverse distance weighting power
SIGMA: float, default 1.5
Standard deviation for Gaussian kernel
FILL_VALUE: float or NoneType, default None
Output fill_value for invalid points
Default will use fill values from data file
REFERENCE: bool, default False
Calculate firn variables in reference to first field
"""
# set parameters based on input model
FIRN_FILE = {}
if (MODEL == 'FGRN11'):
# filename and directory for input FGRN11 file
FIRN_FILE['zs'] = 'FDM_zs_FGRN11_1960-2016.nc'
FIRN_FILE['FirnAir'] = 'FDM_FirnAir_FGRN11_1960-2016.nc'
FIRN_DIRECTORY = ['RACMO','FGRN11_1960-2016']
elif (MODEL == 'FGRN055'):
# filename and directory for input FGRN055 file
FIRN_FILE['zs'] = 'FDM_zs_FGRN055_1960-2017_interpol.nc'
FIRN_FILE['FirnAir'] = 'FDM_FirnAir_FGRN055_1960-2017_interpol.nc'
FIRN_DIRECTORY = ['RACMO','FGRN055_1960-2017']
elif (MODEL == 'XANT27'):
# filename and directory for input XANT27 file
FIRN_FILE['zs'] = 'FDM_zs_ANT27_1979-2016.nc'
FIRN_FILE['FirnAir'] = 'FDM_FirnAir_ANT27_1979-2016.nc'
FIRN_DIRECTORY = ['RACMO','XANT27_1979-2016']
elif (MODEL == 'ASE055'):
# filename and directory for input ASE055 file
FIRN_FILE['zs'] = 'FDM_zs_ASE055_1979-2015.nc'
FIRN_FILE['FirnAir'] = 'FDM_FirnAir_ASE055_1979-2015.nc'
FIRN_DIRECTORY = ['RACMO','ASE055_1979-2015']
elif (MODEL == 'XPEN055'):
# filename and directory for input XPEN055 file
FIRN_FILE['zs'] = 'FDM_zs_XPEN055_1979-2016.nc'
FIRN_FILE['FirnAir'] = 'FDM_FirnAir_XPEN055_1979-2016.nc'
FIRN_DIRECTORY = ['RACMO','XPEN055_1979-2016']
# Open the RACMO NetCDF file for reading
ddir = os.path.join(base_dir,*FIRN_DIRECTORY)
fileID = netCDF4.Dataset(os.path.join(ddir,FIRN_FILE[VARIABLE]), 'r')
# Get data from each netCDF variable and remove singleton dimensions
fd = {}
fd[VARIABLE] = np.squeeze(fileID.variables[VARIABLE][:].copy())
fd['lon'] = fileID.variables['lon'][:,:].copy()
fd['lat'] = fileID.variables['lat'][:,:].copy()
fd['time'] = fileID.variables['time'][:].copy()
# invalid data value
fv = np.float64(fileID.variables[VARIABLE]._FillValue)
# input shape of RACMO firn data
nt,ny,nx = np.shape(fd[VARIABLE])
# close the NetCDF files
fileID.close()
# indices of specified ice mask
i,j = np.nonzero(fd[VARIABLE][0,:,:] != fv)
# use a gaussian filter to smooth mask
gs = {}
gs['mask'] = scipy.ndimage.gaussian_filter(fd['mask'], SIGMA,
mode='constant', cval=0)
# indices of smoothed ice mask
ii,jj = np.nonzero(np.ceil(gs['mask']) == 1.0)
# use a gaussian filter to smooth each firn field
gs[VARIABLE] = np.ma.zeros((nt,ny,nx), fill_value=fv)
gs[VARIABLE].mask = np.ma.zeros((nt,ny,nx), dtype=bool)
for t in range(nt):
# replace fill values before smoothing data
temp1 = np.zeros((ny,nx))
# reference to first firn field
if REFERENCE:
temp1[i,j] = fd[VARIABLE][t,i,j] - fd[VARIABLE][0,i,j]
else:
temp1[i,j] = fd[VARIABLE][t,i,j].copy()
# smooth firn field
temp2 = scipy.ndimage.gaussian_filter(temp1, SIGMA,
mode='constant', cval=0)
# scale output smoothed firn field
gs[VARIABLE][t,ii,jj] = temp2[ii,jj]/gs['mask'][ii,jj]
# replace valid firn values with original
gs[VARIABLE][t,i,j] = temp1[i,j]
# set mask variables for time
gs[VARIABLE].mask[t,:,:] = (gs['mask'] == 0.0)
# convert RACMO latitude and longitude to input coordinates (EPSG)
crs1 = pyproj.CRS.from_string(EPSG)
crs2 = pyproj.CRS.from_epsg(4326)
transformer = pyproj.Transformer.from_crs(crs1, crs2, always_xy=True)
direction = pyproj.enums.TransformDirection.INVERSE
# convert projection from model coordinates
xg,yg = transformer.transform(fd['lon'], fd['lat'], direction=direction)
# construct search tree from original points
# can use either BallTree or KDTree algorithms
xy1 = np.concatenate((xg[ii,jj,None],yg[ii,jj,None]),axis=1)
tree = SMBcorr.spatial.build_tree(xy1, SEARCH=SEARCH)
# output interpolated arrays of firn variable (height or firn air content)
npts = len(tdec)
extrap_data = np.ma.zeros((npts),fill_value=fv,dtype=np.float64)
extrap_data.data[:] = extrap_data.fill_value
extrap_data.mask = np.zeros((npts),dtype=bool)
# type designating algorithm used (1:interpolate, 2:backward, 3:forward)
extrap_data.interpolation = np.zeros((npts),dtype=np.uint8)
# time cutoff without close time interpolation
time_cutoff = (fd['time'].min(), fd['time'].max())
# find days that can be interpolated
if np.any((tdec >= time_cutoff[0]) & (tdec < time_cutoff[1])):
# indices of dates for interpolated days
ind,=np.nonzero((tdec >= time_cutoff[0]) & (tdec < time_cutoff[1]))
# reduce x, y and t coordinates
xind,yind,tind = (X[ind],Y[ind],tdec[ind])
# find indices for linearly interpolating in time
f = scipy.interpolate.interp1d(fd['time'], np.arange(nt), kind='linear')
date_indice = f(tind).astype(np.int64)
# for each unique firn date
# linearly interpolate in time between two firn maps
# then then inverse distance weighting to extrapolate in space
for k in np.unique(date_indice):
kk, = np.nonzero(date_indice==k)
count = np.count_nonzero(date_indice==k)
# query the search tree to find the NN closest points
xy2 = np.concatenate((xind[kk,None],yind[kk,None]),axis=1)
dist,indices = tree.query(xy2, k=NN, return_distance=True)
# normalized weights if POWER > 0 (typically between 1 and 3)
# in the inverse distance weighting
power_inverse_distance = dist**(-POWER)
s = np.sum(power_inverse_distance, axis=1)
w = power_inverse_distance/np.broadcast_to(s[:,None],(count,NN))
# firn height or air content for times before and after tdec
firn1 = gs[VARIABLE][k,ii,jj]
firn2 = gs[VARIABLE][k+1,ii,jj]
# linearly interpolate to date
dt = (tind[kk] - fd['time'][k])/(fd['time'][k+1] - fd['time'][k])
# spatially extrapolate using inverse distance weighting
extrap_data[kk] = (1.0-dt)*np.sum(w*firn1[indices],axis=1) + \
dt*np.sum(w*firn2[indices], axis=1)
# set interpolation type (1: interpolated in time)
extrap_data.interpolation[ind] = 1
# check if needing to extrapolate backwards in time
count = np.count_nonzero(tdec < time_cutoff[0])
if (count > 0):
# indices of dates before firn model
ind, = np.nonzero(tdec < time_cutoff[0])
# query the search tree to find the NN closest points
xy2 = np.concatenate((X[ind,None],Y[ind,None]),axis=1)
dist,indices = tree.query(xy2, k=NN, return_distance=True)
# normalized weights if POWER > 0 (typically between 1 and 3)
# in the inverse distance weighting
power_inverse_distance = dist**(-POWER)
s = np.sum(power_inverse_distance, axis=1)
w = power_inverse_distance/np.broadcast_to(s[:,None],(count,NN))
# calculate a regression model for calculating values
# read first 10 years of data to create regression model
N = 365
# spatially interpolate firn elevation or air content to coordinates
FIRN = np.zeros((count,N))
T = np.zeros((N))
# create interpolated time series for calculating regression model
for k in range(N):
# time at k
T[k] = gs['time'][k]
# spatially extrapolate firn elevation or air content
firn1 = fd[VARIABLE][k,ii,jj]
FIRN[:,k] = np.sum(w*firn1[indices],axis=1)
# calculate regression model
for n,v in enumerate(ind):
extrap_data[v] = regress_model(T, FIRN[n,:], tdec[v], ORDER=2,
CYCLES=[0.25,0.5,1.0,2.0,4.0,5.0], RELATIVE=T[0])
# set interpolation type (2: extrapolated backwards in time)
extrap_data.interpolation[ind] = 2
# check if needing to extrapolate forward in time
count = np.count_nonzero(tdec >= time_cutoff[1])
if (count > 0):
# indices of dates after firn model
ind, = np.nonzero(tdec >= time_cutoff[1])
# query the search tree to find the NN closest points
xy2 = np.concatenate((X[ind,None],Y[ind,None]),axis=1)
dist,indices = tree.query(xy2, k=NN, return_distance=True)
# normalized weights if POWER > 0 (typically between 1 and 3)
# in the inverse distance weighting
power_inverse_distance = dist**(-POWER)
s = np.sum(power_inverse_distance, axis=1)
w = power_inverse_distance/np.broadcast_to(s[:,None],(count,NN))
# calculate a regression model for calculating values
# read last 10 years of data to create regression model
N = 365
# spatially interpolate firn elevation or air content to coordinates
FIRN = np.zeros((count,N))
T = np.zeros((N))
# create interpolated time series for calculating regression model
for k in range(N):
kk = nt - N + k
# time at k
T[k] = fd['time'][kk]
# spatially extrapolate firn elevation or air content
firn1 = gs[VARIABLE][kk,ii,jj]
FIRN[:,k] = np.sum(w*firn1[indices],axis=1)
# calculate regression model
for n,v in enumerate(ind):
extrap_data[v] = regress_model(T, FIRN[n,:], tdec[v], ORDER=2,
CYCLES=[0.25,0.5,1.0,2.0,4.0,5.0], RELATIVE=T[-1])
# set interpolation type (3: extrapolated forward in time)
extrap_data.interpolation[ind] = 3
# complete mask if any invalid in data
invalid, = np.nonzero(extrap_data.data == extrap_data.fill_value)
extrap_data.mask[invalid] = True
# replace fill value if specified
if FILL_VALUE:
extrap_data.fill_value = FILL_VALUE
extrap_data.data[extrap_data.mask] = extrap_data.fill_value
# return the interpolated values
return extrap_data