Source code for SMBcorr.merra_smb_cumulative

#!/usr/bin/env python
u"""
merra_smb_cumulative.py
Written by Tyler Sutterley (09/2024)
Reads MERRA-2 datafiles to calculate monthly cumulative anomalies
    in derived surface mass balance products

From tavgM_2d_int (Vertically Integrated Diagnostics) collection:
    PRECCU (convective rain)
    PRECLS (large-scale rain)
    PRECSN (snow)
    and EVAP (evaporation)
From tavgM_2d_glc (Land Ice Surface Diagnostics) collection:
    RUNOFF (runoff over glaciated land)

INPUTS:
    SMB: Surface Mass Balance
    ACCUM: Snowfall accumulation
    PRECIP: Total Precipitation
    RAINFALL: Total Rainfall
    SUBLIM: Evaporation and Sublimation
    RUNOFF: Meltwater Runoff

COMMAND LINE OPTIONS:
    -D X, --directory X: working data directory
    -m X, --mean X: Year range for mean
    -M X, --mode X: Permission mode of directories and files
    -V, --verbose: Output information for each output file

PYTHON DEPENDENCIES:
    numpy: Scientific Computing Tools For Python
        https://numpy.org
        https://numpy.org/doc/stable/user/numpy-for-matlab-users.html
    dateutil: powerful extensions to datetime
        https://dateutil.readthedocs.io/en/stable/
    netCDF4: Python interface to the netCDF C library
         https://unidata.github.io/netcdf4-python/netCDF4/index.html
    h5py: Python interface for Hierarchal Data Format 5 (HDF5)
        https://h5py.org

PROGRAM DEPENDENCIES:
    spatial.py: utilities for working with geospatial data
    time.py: utilities for calculating time operations

UPDATE HISTORY:
    Updated 09/2024: use wrapper to importlib for optional dependencies
    Updated 09/2023: using updated spatial functions
    Updated 08/2022: updated docstrings to numpy documentation format
    Updated 10/2021: using python logging for handling verbose output
        add more derived products and include sublimation and condensation
    Updated 02/2021: replaced numpy bool to prevent deprecation warning
        sort files by month as September 2020 was reprocessed
        https://daac.gsfc.nasa.gov/information/alerts
    Updated 01/2021: use argparse to set command line parameters
        using spatial module for read/write operations
        using utilities from time module
    Updated 10/2019: changing Y/N flags to True/False
    Updated 01/2017: can output different data products
    Written 11/2016
"""
from __future__ import print_function

import sys
import os
import re
import logging
import argparse
import warnings
import numpy as np
import SMBcorr.time
import SMBcorr.spatial
import SMBcorr.utilities

# attempt imports
netCDF4 = SMBcorr.utilities.import_dependency('netCDF4')

# PURPOSE: read variables from MERRA-2 tavgM_2d_int and tavgM_2d_glc files
[docs]def read_merra_variables(merra_flux_file, merra_ice_surface_file): """ Read a subset of MERRA-2 variables for deriving surface mass balance products From ``tavgM_2d_int`` (Vertically Integrated Diagnostics) collection: - ``PRECCU``: convective rain - ``PRECLS``: large-scale rain - ``PRECSN``: snow - ``EVAP``: evaporation From ``tavgM_2d_glc`` (Land Ice Surface Diagnostics) collection: - ``RUNOFF``: runoff over glaciated land Parameters ---------- merra_flux_file: str Path to ``tavgM_2d_int`` file merra_ice_surface_file: str Path to ``tavgM_2d_glc`` file """ # python dictionary of output variables dinput = {} # read each variable of interest in MERRA-2 flux file with netCDF4.Dataset(merra_flux_file, 'r') as fid1: # extract geolocation variables dinput['lon'] = fid1.variables['lon'][:].copy() dinput['lat'] = fid1.variables['lat'][:].copy() # convert time from netCDF4 units to Julian Days date_string = fid1.variables['time'].units epoch,to_secs = SMBcorr.time.parse_date_string(date_string) dinput['time'] = SMBcorr.time.convert_delta_time( to_secs*fid1.variables['time'][:],epoch1=epoch, epoch2=(1858,11,17,0,0,0), scale=1.0/86400.0) + 2400000.5 # read each variable of interest in MERRA-2 flux file for key in ['PRECCU','PRECLS','PRECSN','EVAP']: # Getting the data from each NetCDF variable of interest dinput[key] = np.ma.array(fid1.variables[key][:].squeeze(), fill_value=fid1.variables[key]._FillValue) dinput[key].mask = (dinput[key].data == dinput[key].fill_value) # read each variable of interest in MERRA-2 ice surface file with netCDF4.Dataset(merra_ice_surface_file, 'r') as fid2: for key in ['RUNOFF','WESNSC']: # Getting the data from each NetCDF variable of interest dinput[key] = np.ma.array(fid2.variables[key][:].squeeze(), fill_value=fid2.variables[key]._FillValue) dinput[key].mask = (dinput[key].data == dinput[key].fill_value) return dinput
# PURPOSE: read monthly MERRA-2 datasets to calculate cumulative anomalies
[docs]def merra_smb_cumulative(DIRECTORY, PRODUCT, RANGE=None, VERBOSE=False, MODE=0o775): """ Reads MERRA-2 datafiles to calculate monthly cumulative anomalies in derived surface mass balance products Parameters ---------- base_dir: str Working data directory PRODUCT: str MERRA-2 product to calculate - ``SMB``: Surface Mass Balance - ``ACCUM``: Snowfall accumulation - ``PRECIP``: Precipitation - ``RAIN``: Total Rainfall - ``SUBLIM``: Evaporation and Sublimation - ``REFREEZE``: Melt Water Refreeze RANGE: list, default [1961,1990] Start and end year of mean VERBOSE: bool, default False Verbose output of netCDF4 variables MODE: oct, default 0o775 Permission mode of directories and files created """ # create logger for verbosity level loglevel = logging.INFO if VERBOSE else logging.CRITICAL logging.basicConfig(level=loglevel) # MERRA-2 product subdirectories P1 = 'M2TMNXINT.5.12.4' P2 = 'M2TMNXGLC.5.12.4' # MERRA-2 output cumulative subdirectory cumul_sub = '{0}.5.12.4.CUMUL.{1:d}.{2:d}'.format(PRODUCT,*RANGE) # make cumulative subdirectory if not os.access(os.path.join(DIRECTORY,cumul_sub), os.F_OK): os.mkdir(os.path.join(DIRECTORY,cumul_sub), MODE) # regular expression operator to find datafiles (and not the xml files) regex_pattern = 'MERRA2_(\d+).{0}.(\d{{4}})(\d{{2}}).nc4(?!.xml)' # sign for each product to calculate total SMB smb_sign = {'PRECCU':1.0,'PRECLS':1.0,'PRECSN':1.0,'EVAP':-1.0, 'RUNOFF':-1.0,'WESNSC':1.0} # titles for each output data product merra_products = {} merra_products['SMB'] = 'MERRA-2 Surface Mass Balance' merra_products['ACCUM'] = 'MERRA-2 Snowfall accumulation' merra_products['PRECIP'] = 'MERRA-2 Precipitation' merra_products['RAINFALL'] = 'MERRA-2 Rainfall' merra_products['SUBLIM'] = 'MERRA-2 Evaporation and Sublimation' merra_products['RUNOFF'] = 'MERRA-2 Meltwater Runoff' # source of each output data product merra_sources = {} merra_sources['SMB'] = ['PRECCU','PRECLS','PRECSN','EVAP','RUNOFF','WESNSC'] merra_sources['ACCUM'] = ['PRECSN','EVAP'] merra_sources['PRECIP'] = ['PRECCU','PRECLS','PRECSN'] merra_sources['RAINFALL'] = ['PRECCU','PRECLS'] merra_sources['SUBLIM'] = ['EVAP','WESNSC'] merra_sources['RUNOFF'] = ['RUNOFF'] merra_reference = ', '.join(merra_sources[PRODUCT]) # output bad value fill_value = -9999.0 # output dimensions and extents nlat,nlon = (361,576) # read mean data from merra_smb_mean.py MEAN = f'MERRA2.tavgM_2d_{PRODUCT}_mean_Nx.{RANGE[0]:4d}-{RANGE[1]:4d}.nc' mean_file = os.path.join(DIRECTORY, MEAN) merra_mean = SMBcorr.spatial.from_netCDF4(mean_file, varname=PRODUCT) # find years of available data YEARS = sorted([d for d in os.listdir(os.path.join(DIRECTORY,P1)) if re.match('\d{4}',d)]) # check that are years are available CHECK = [str(Y) in YEARS for Y in range(int(YEARS[0]),int(YEARS[-1])+1)] if not np.all(CHECK): raise Exception('Not all years available on file system') # compile regular expression operator for flux product rx = re.compile(regex_pattern.format('tavgM_2d_int_Nx'), re.VERBOSE) # output data and attributes output = {} attrib = {} # latitude attrib['lat'] = {} attrib['lat']['long_name'] = 'Latitude' attrib['lat']['units'] = 'Degrees_North' # longitude attrib['lon'] = {} attrib['lon']['long_name'] = 'Longitude' attrib['lon']['units'] = 'Degrees_East' # output mean of product attrib[PRODUCT] = {} attrib[PRODUCT]['description'] = merra_products[PRODUCT] attrib[PRODUCT]['reference'] = merra_reference attrib[PRODUCT]['model'] = 'MERRA-2' attrib[PRODUCT]['units'] = 'mm w.e.' attrib[PRODUCT]['long_name'] = 'Equivalent_Water_Thickness' attrib[PRODUCT]['_FillValue'] = fill_value # time attrib['time'] = {} attrib['time']['long_name'] = 'Time' attrib['time']['units'] = 'years' # monthly cumulative anomalies # cumulative mass anomalies calculated by removing mean balance flux output['lat'] = np.copy(merra_mean['y']) output['lon'] = np.copy(merra_mean['x']) # output data and mask output[PRODUCT] = np.ma.zeros((nlat,nlon,1), fill_value=fill_value) output[PRODUCT].mask = np.copy(merra_mean['data'].mask) # for each input file for Y in YEARS: # find input files for PRODUCT f=[f for f in os.listdir(os.path.join(DIRECTORY,P1,Y)) if rx.match(f)] # sort files by month indices = np.argsort([rx.match(f1).group(3) for f1 in f]) f = [f[indice] for indice in indices] # days per month in year dpm = SMBcorr.time.calendar_days(int(Y)) # for each monthly file for M,f1 in enumerate(f): # extract parameters from input flux file MOD,Y1,M1 = rx.findall(f1).pop() # corresponding ice surface product file args = (MOD,'tavgM_2d_glc_Nx',Y1,M1) f2 = 'MERRA2_{0}.{1}.{2}{3}.nc4'.format(*args) # full path for flux and ice surface files merra_flux_file = os.path.join(DIRECTORY,P1,Y,f1) merra_ice_surface_file = os.path.join(DIRECTORY,P2,Y,f2) if not os.access(merra_ice_surface_file,os.F_OK): raise Exception('File {0} not in file system'.format(f2)) # read netCDF4 files for variables of interest var = read_merra_variables(merra_flux_file,merra_ice_surface_file) # convert from Julian days to calendar dates YY,MM,DD,hh,mm,ss = SMBcorr.time.convert_julian(var['time'], FORMAT='tuple') # calculate the total seconds in month seconds = dpm[M]*24.0*60.0*60.0 # data for time step dinput = np.ma.zeros((nlat,nlon), fill_value=fill_value) dinput.mask = np.zeros((nlat,nlon), dtype=bool) for p in ['PRECCU','PRECLS','PRECSN','EVAP','RUNOFF','WESNSC']: dinput.mask |= var[p].mask # valid indices for all variables indy,indx = np.nonzero(np.logical_not(dinput.mask)) if (PRODUCT == 'SMB'): # calculate SMB and convert from flux to monthly for p in ['PRECCU','PRECLS','PRECSN','EVAP','RUNOFF','WESNSC']: tmp = var[p][indy,indx]*seconds*smb_sign[p] dinput.data[indy,indx] += tmp elif (PRODUCT == 'ACCUM'): # calculate accumulation and convert from flux to monthly for p in ['PRECSN','EVAP','WESNSC']: tmp = var[p][indy,indx]*seconds*smb_sign[p] dinput.data[indy,indx] += tmp elif (PRODUCT == 'PRECIP'): # calculate precipitation and convert from flux to monthly for p in ['PRECCU','PRECLS','PRECSN']: tmp = var[p][indy,indx]*seconds dinput.data[indy,indx] += tmp elif (PRODUCT == 'RAINFALL'): # calculate rainfall and convert from flux to monthly for p in ['PRECCU','PRECLS']: tmp = var[p][indy,indx]*seconds dinput.data[indy,indx] += tmp elif (PRODUCT == 'SUBLIM'): # calculate sublimation and convert from flux to monthly for p in ['EVAP','WESNSC']: tmp = var[p][indy,indx]*seconds dinput.data[indy,indx] += tmp elif (PRODUCT == 'RUNOFF'): # convert runoff from flux to monthly for p in ['RUNOFF']: tmp = var[p][indy,indx]*seconds dinput.data[indy,indx] += tmp # subtract mean and add to cumulative anomalies output[PRODUCT].data[:,:,:] += np.atleast_3d(dinput.data) - merra_mean['data'] output[PRODUCT].mask[:,:,:] |= np.atleast_3d(dinput.mask) output[PRODUCT].data[output[PRODUCT].mask] = output[PRODUCT].fill_value # calculate time in year decimal output['time'] = SMBcorr.time.convert_calendar_decimal(YY, MM,day=DD,hour=hh,minute=mm,second=ss) # output MERRA-2 cumulative data file FILE = f'MERRA2_{MOD}.tavgM_2d_{PRODUCT}_cumul_Nx.{Y1}{M1}.nc' output_file = os.path.join(DIRECTORY,cumul_sub,FILE) SMBcorr.spatial.to_netCDF4(output, attrib, output_file, data_type='grid') # change the permissions mode os.chmod(output_file, MODE)
# PURPOSE: create argument parser def arguments(): parser = argparse.ArgumentParser( description="""Reads MERRA-2 datafiles to calculate monthly cumulative anomalies in derived surface mass balance products """ ) # command line parameters choices = ['SMB','ACCUM','PRECIP','RAINFALL','SUBLIM','RUNOFF'] parser.add_argument('product', type=str, nargs='+', choices=choices, help='MERRA-2 derived product') # working data directory parser.add_argument('--directory','-D', type=lambda p: os.path.abspath(os.path.expanduser(p)), default=os.getcwd(), help='Working data directory') # start and end years to run for mean parser.add_argument('--mean','-m', metavar=('START','END'), type=int, nargs=2, default=[1980,1995], help='Start and end year range for mean') # print information about each output file parser.add_argument('--verbose','-V', default=False, action='store_true', help='Verbose output of run') # permissions mode of the local directories and files (number in octal) parser.add_argument('--mode','-M', type=lambda x: int(x,base=8), default=0o775, help='Permission mode of directories and files') # return the parser return parser # Main program that calls merra_smb_cumulative() def main(): # Read the system arguments listed after the program parser = arguments() args,_ = parser.parse_known_args() # run program for each input product for PRODUCT in args.product: merra_smb_cumulative(args.directory, PRODUCT, RANGE=args.mean, VERBOSE=args.verbose, MODE=args.mode) # run main program if __name__ == '__main__': main()