Source code for SMBcorr.merra_hybrid_cumulative

#!/usr/bin/env python
u"""
merra_hybrid_cumulative.py
Written by Tyler Sutterley (09/2024)
Reads MERRA-2 hybrid datafiles to calculate cumulative anomalies in
    derived surface mass balance products
MERRA-2 Hybrid model outputs provided by Brooke Medley at GSFC

CALLING SEQUENCE:
    python merra_hybrid_cumulative.py --directory <path> --region gris \
        --mean 1980 1995

COMMAND LINE OPTIONS:
    -D X, --directory X: Working data directory
    -R X, --region X: Region to calculate (gris, ais)
    -v X, --version X: Version of firn model to calculate
        v0
        v1
        v1.0
        v1.1
        v1.2
        v1.2.1
    --mean: Start and end year of mean
    -G, --gzip: netCDF4 file is locally gzip compressed
    -V, --verbose: Output information for each output file
    -M X, --mode X: Local permissions mode of the directories and files

PYTHON DEPENDENCIES:
    numpy: Scientific Computing Tools For Python
        https://numpy.org
        https://numpy.org/doc/stable/user/numpy-for-matlab-users.html
    netCDF4: Python interface to the netCDF C library
         https://unidata.github.io/netcdf4-python/netCDF4/index.html

UPDATE HISTORY:
    Updated 09/2024: use wrapper to importlib for optional dependencies
    Updated 02/2023: new doi for Medley (2022) Cryosphere paper
    Updated 10/2022: add Greenland and Antarctic versions v1.2.1
    Updated 05/2022: use argparse descriptions within sphinx documentation
    Updated 12/2021: added GSFC MERRA-2 Hybrid Greenland v1.2
        can use variable loglevels for verbose output
    Updated 10/2021: using python logging for handling verbose output
    Updated 08/2021: output areas to file if applicable
        add verbose option to print input and output file information
        additionally output surface mass balance anomalies
    Updated 02/2021: using argparse to set parameters
        read and write for all available variables in a file
        added gzip compression option
    Written 10/2019
"""
from __future__ import print_function

import os
import re
import gzip
import time
import uuid
import logging
import argparse
import warnings
import numpy as np
import SMBcorr.utilities

# attempt imports
netCDF4 = SMBcorr.utilities.import_dependency('netCDF4')

# PURPOSE: calculate cumulative anomalies in MERRA-2 hybrid
# surface mass balance variables
[docs]def merra_hybrid_cumulative(base_dir, REGION, VERSION, RANGE=None, GZIP=False, MODE=0o775): """ Calculates cumulative anomalies of MERRA-2 hybrid surface mass balance products Parameters ---------- base_dir: str Working data directory REGION: str MERRA-2 region to interpolate - ``ais``: Antarctica - ``gris``: Greenland VERSION: str MERRA-2 hybrid model version RANGE: list Start and end year for mean GZIP: bool, default False netCDF4 file is gzip compressed VERBOSE: bool, default False Verbose output of netCDF4 variables MODE: oct, default 0o775 Permission mode of directories and files created """ # MERRA-2 hybrid directory DIRECTORY = os.path.join(base_dir,VERSION) # set version parameters suffix = '.gz' if GZIP else '' if (VERSION == 'v0'): # input and output netCDF4 files args = (REGION.lower(),suffix) hybrid_file = 'm2_hybrid_p_minus_e_melt_{0}.nc{1}'.format(*args) output_file = 'm2_hybrid_cumul_{0}.nc{1}'.format(*args) # names of variables to read VARIABLES = ('p_minus_e','melt') AREA = None anomaly_flag = '_anomaly' elif VERSION in ('v1','v1.0'): # input and output netCDF4 files MAJOR_VERSION = re.match(r'((v\d+)(\.\d+)?)$',VERSION).group(2) args = (MAJOR_VERSION,REGION.lower(),suffix) hybrid_file = 'gsfc_fdm_smb_{0}_{1}.nc{2}'.format(*args) output_file = 'gsfc_fdm_smb_cumul_{0}_{1}.nc{2}'.format(*args) # names of variables to read VARIABLES = ('runoff','rainfall','snowfall_minus_sublimation','SMB') AREA = None # flag to append to output netCDF4 variables anomaly_flag = '_anomaly' else: # input and output netCDF4 files FILE_VERSION = VERSION.replace('.','_') args = (FILE_VERSION,REGION.lower(),suffix) firn_height_file = 'gsfc_fdm_{0}_{1}.nc{2}'.format(*args) hybrid_file = 'gsfc_fdm_smb_{0}_{1}.nc{2}'.format(*args) output_file = 'gsfc_fdm_smb_cumul_{0}_{1}.nc{2}'.format(*args) # names of variables to read VARIABLES = ('Me','Ra','Ru','Sn-Ev','SMB') AREA = 'iArea' if REGION.lower() in ('gris',) else None # flag to append to output netCDF4 variables anomaly_flag = '_a' # Open the MERRA-2 Hybrid NetCDF file for reading if GZIP: # read as in-memory (diskless) netCDF4 dataset with gzip.open(os.path.join(DIRECTORY,hybrid_file),'r') as f: fileID = netCDF4.Dataset(uuid.uuid4().hex, memory=f.read()) else: # read netCDF4 dataset fileID = netCDF4.Dataset(os.path.join(DIRECTORY,hybrid_file), 'r') # Output NetCDF file information logging.info(os.path.join(DIRECTORY,hybrid_file)) logging.info(list(fileID.variables.keys())) # Get data and attribute from each netCDF variable fd = {} attrs = {} # input time (year-decimal) fd['time'] = fileID.variables['time'][:].copy() # extract areas from SMB or firn height file if AREA and (AREA in fileID.variables): fd[AREA] = fileID.variables[AREA][:].copy() # get each attribute for area variable if applicable attrs[AREA] = {} for att_name in ['units','long_name','standard_name']: if hasattr(fileID.variables[AREA],att_name): attrs[AREA][att_name]=fileID.variables[AREA].getncattr(att_name) elif AREA: # Open the MERRA-2 Hybrid firn height file for reading if GZIP: # read as in-memory (diskless) netCDF4 dataset with gzip.open(os.path.join(DIRECTORY,firn_height_file),'r') as f: fid1 = netCDF4.Dataset(uuid.uuid4().hex, memory=f.read()) else: # read netCDF4 dataset fid1 = netCDF4.Dataset(os.path.join(DIRECTORY,firn_height_file), 'r') # copy area from firn height file fd[AREA] = fid1.variables[AREA][:].copy() # get each attribute for area variable if applicable attrs[AREA] = {} for att_name in ['units','long_name','standard_name']: if hasattr(fid1.variables[AREA],att_name): attrs[AREA][att_name]=fid1.variables[AREA].getncattr(att_name) # close the firn height file fid1.close() # extract x and y coordinate arrays from grids if applicable # else create meshgrids of coordinate arrays if (np.ndim(fileID.variables['x'][:]) == 2): xg = fileID.variables['x'][:].copy() yg = fileID.variables['y'][:].copy() fd['x'],fd['y'] = (xg[:,0],yg[0,:]) else: fd['x'] = fileID.variables['x'][:].copy() fd['y'] = fileID.variables['y'][:].copy() xg,yg = np.meshgrid(fd['x'],fd['y'],indexing='ij') # time is year decimal at time step 5 days time_step = 5.0/365.25 # calculate mean period for MERRA-2 tt, = np.nonzero((fd['time'] >= RANGE[0]) & (fd['time'] < (RANGE[1]+1))) # for each variable for v in VARIABLES: # copy data and remove singleton dimensions DATA = np.ma.array(fileID.variables[v][:]).squeeze() # invalid data value DATA.fill_value = np.float64(fileID.variables[v]._FillValue) # set masks DATA.mask = (DATA.data == DATA.fill_value) # get each attribute for variable if applicable attrs[v] = {} for att_name in ['units','long_name','standard_name','comment']: if hasattr(fileID.variables[v],att_name): attrs[v][att_name] = fileID.variables[v].getncattr(att_name) # input shape of MERRA-2 Hybrid firn data nt,nx,ny = np.shape(DATA) # cumulative mass anomalies calculated by removing mean balance flux # mean of data for variable (converted from yearly rate) MEAN = np.mean(DATA.data[tt,:,:]*time_step, axis=0) # indices of specified ice mask at the first slice i,j = np.nonzero(~DATA.mask[0,:,:]) valid_count = np.count_nonzero(~DATA.mask[0,:,:]) # allocate for output variable fd[v] = np.ma.zeros((nt,nx,ny),fill_value=DATA.fill_value) fd[v].mask = (DATA.mask | np.isnan(DATA.data)) CUMULATIVE = np.zeros((valid_count)) # calculate output cumulative anomalies for variable for t in range(nt): # convert mass flux from yearly rate and # calculate cumulative anomalies at time t CUMULATIVE += (DATA.data[t,i,j]*time_step - MEAN[i,j]) fd[v].data[t,i,j] = CUMULATIVE.copy() # replace masked values with fill value fd[v].data[fd[v].mask] = fd[v].fill_value # close the NetCDF files fileID.close() # Output NetCDF filename logging.info(os.path.join(DIRECTORY,output_file)) # output MERRA-2 data file with cumulative data if GZIP: # open virtual file object for output fileID = netCDF4.Dataset(uuid.uuid4().hex,'w',memory=True, format='NETCDF4') else: # opening NetCDF file for writing fileID = netCDF4.Dataset(os.path.join(DIRECTORY,output_file),'w', format="NETCDF4") # Defining the NetCDF dimensions fileID.createDimension('x', nx) fileID.createDimension('y', ny) fileID.createDimension('time', nt) # python dictionary with netCDF4 variables nc = {} # defining the NetCDF variables nc['x'] = fileID.createVariable('x', fd['x'].dtype, ('x',)) nc['y'] = fileID.createVariable('y', fd['y'].dtype, ('y',)) nc['time'] = fileID.createVariable('time', fd['time'].dtype, ('time',)) # output area variable if AREA: nc[AREA] = fileID.createVariable(AREA, fd[AREA].dtype, ('x','y',), fill_value=fd[AREA].fill_value, zlib=True) # for each output variable for v in VARIABLES: # append anomaly flag var = '{0}{1}'.format(v,anomaly_flag) nc[v] = fileID.createVariable(var, fd[v].dtype, ('time','x','y',), fill_value=fd[v].fill_value, zlib=True) # filling NetCDF variables for key,val in fd.items(): nc[key][:] = val.copy() # create variable and attributes for projection if REGION in ('gris',): crs = fileID.createVariable('Polar_Stereographic',np.byte,()) crs.standard_name = 'Polar_Stereographic' crs.grid_mapping_name = 'polar_stereographic' crs.straight_vertical_longitude_from_pole = -45.0 crs.latitude_of_projection_origin = 90.0 crs.standard_parallel = 70.0 crs.scale_factor_at_projection_origin = 1. crs.false_easting = 0.0 crs.false_northing = 0.0 crs.semi_major_axis = 6378.137 crs.semi_minor_axis = 6356.752 crs.inverse_flattening = 298.257223563 crs.spatial_epsg = '3413' elif REGION in ('ais',): crs = fileID.createVariable('Polar_Stereographic',np.byte,()) crs.standard_name = 'Polar_Stereographic' crs.grid_mapping_name = 'polar_stereographic' crs.straight_vertical_longitude_from_pole = 0.0 crs.latitude_of_projection_origin = -90.0 crs.standard_parallel = -71.0 crs.scale_factor_at_projection_origin = 1. crs.false_easting = 0.0 crs.false_northing = 0.0 crs.semi_major_axis = 6378.137 crs.semi_minor_axis = 6356.752 crs.inverse_flattening = 298.257223563 crs.spatial_epsg = '3031' # Defining attributes for x and y coordinates nc['x'].long_name = 'Easting' nc['x'].standard_name = 'projection_x_coordinate' nc['x'].grid_mapping = 'Polar_Stereographic' nc['x'].units = 'meters' nc['y'].long_name = 'Northing' nc['y'].standard_name = 'projection_y_coordinate' nc['y'].grid_mapping = 'Polar_Stereographic' nc['y'].units = 'meters' # defining attributes for area variable if AREA: # set area variable attributes for att_name,att_val in attrs[AREA].items(): nc[AREA].setncattr(att_name,att_val) # set grid mapping attribute nc[AREA].setncattr('grid_mapping','Polar_Stereographic') # Defining attributes for variables for v in VARIABLES: # set variable attributes for att_name,att_val in attrs[v].items(): nc[v].setncattr(att_name,att_val.replace(' per year','')) # set grid mapping attribute nc[v].setncattr('grid_mapping','Polar_Stereographic') # Defining attributes for date nc['time'].long_name = 'time, 5-daily resolution' nc['time'].units = 'decimal years, 5-daily resolution' # global attributes of NetCDF file fileID.title = ('Cumulative anomalies in GSFC-FDM{0} variables relative ' 'to {1:4d}-{2:4d}').format(VERSION,RANGE[0],RANGE[1]) fileID.date_created = time.strftime('%Y-%m-%d',time.localtime()) fileID.source = 'version {0}'.format(VERSION) fileID.references = ("Medley, B., Neumann, T. A., Zwally, H. J., " "Smith, B. E., and Stevens, C. M.: Simulations of Firn Processes " "over the Greenland and Antarctic Ice Sheets: 1980--2021, " "The Cryosphere, https://doi.org/10.5194/tc-16-3971-2022, 2022.") fileID.institution = "NASA Goddard Space Flight Center (GSFC)" # Output NetCDF file information logging.info(list(fileID.variables.keys())) # Closing the NetCDF file and getting the buffer object nc_buffer = fileID.close() # write MERRA-2 data file to gzipped file if GZIP: # copy bytes to file with gzip.open(os.path.join(DIRECTORY,output_file), 'wb') as f: f.write(nc_buffer) # change the permissions mode os.chmod(os.path.join(DIRECTORY,output_file), MODE)
# PURPOSE: create argument parser def arguments(): parser = argparse.ArgumentParser( description="""Reads MERRA-2 Hybrid datafiles to calculate cumulative anomalies in surface mass balance products """ ) # command line parameters # working data directory parser.add_argument('--directory','-D', type=lambda p: os.path.abspath(os.path.expanduser(p)), default=os.getcwd(), help='Working data directory') # region of firn model parser.add_argument('--region','-R', type=str, default='gris', choices=['gris','ais'], help='Region of firn model to calculate') # version of firn model versions = ['v0','v1','v1.0','v1.1','v1.2','v1.2.1'] parser.add_argument('--version','-v', type=str, default='v1.2.1', choices=versions, help='Version of firn model to calculate') # start and end years to run for mean parser.add_argument('--mean','-m', metavar=('START','END'), type=int, nargs=2, default=[1980,1995], help='Start and end year range for mean') # netCDF4 files are gzip compressed parser.add_argument('--gzip','-G', default=False, action='store_true', help='netCDF4 file is locally gzip compressed') # print information about each input and output file parser.add_argument('--verbose','-V', action='count', default=0, help='Verbose output of processing run') # permissions mode of the local directories and files (number in octal) parser.add_argument('--mode','-M', type=lambda x: int(x,base=8), default=0o775, help='Permission mode of directories and files') # return the parser return parser # This is the main part of the program that calls the individual functions def main(): # Read the system arguments listed after the program parser = arguments() args,_ = parser.parse_known_args() # create logger loglevels = [logging.CRITICAL, logging.INFO, logging.DEBUG] logging.basicConfig(level=loglevels[args.verbose]) # run program merra_hybrid_cumulative(args.directory, args.region, args.version, RANGE=args.mean, GZIP=args.gzip, MODE=args.mode) # run main program if __name__ == '__main__': main()