Source code for improver.ensemble_copula_coupling.utilities

# -*- coding: utf-8 -*-
# -----------------------------------------------------------------------------
# (C) British Crown Copyright 2017-2019 Met Office.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
#   list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
#   this list of conditions and the following disclaimer in the documentation
#   and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
#   contributors may be used to endorse or promote products derived from
#   this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""
This module defines the utilities required for Ensemble Copula Coupling
plugins.

"""
import copy
import random

import cf_units as unit
import iris
import numpy as np
from iris.exceptions import CoordinateNotFoundError

from improver.ensemble_copula_coupling.constants import BOUNDS_FOR_ECDF


[docs]def concatenate_2d_array_with_2d_array_endpoints( array_2d, low_endpoint, high_endpoint): """ For a 2d array, add a 2d array as the lower and upper endpoints. The concatenation to add the lower and upper endpoints to the 2d array are performed along the second (index 1) dimension. Args: array_2d (numpy.ndarray): 2d array of values low_endpoint (float or int): Number used to create a 2d array of a constant value as the lower endpoint. high_endpoint (float or int): Number of used to create a 2d array of a constant value as the upper endpoint. Returns: numpy.ndarray: 2d array of values after padding with the low_endpoint and high_endpoint. """ lower_array = ( np.full((array_2d.shape[0], 1), low_endpoint, dtype=array_2d.dtype)) upper_array = ( np.full((array_2d.shape[0], 1), high_endpoint, dtype=array_2d.dtype)) array_2d = np.concatenate( (lower_array, array_2d, upper_array), axis=1) return array_2d
[docs]def choose_set_of_percentiles(no_of_percentiles, sampling="quantile"): """ Function to create percentiles. Args: no_of_percentiles (int): Number of percentiles. sampling (str): Type of sampling of the distribution to produce a set of percentiles e.g. quantile or random. Accepted options for sampling are: * Quantile: A regular set of equally-spaced percentiles aimed at dividing a Cumulative Distribution Function into blocks of equal probability. * Random: A random set of ordered percentiles. Returns: list of float: Percentiles calculated using the sampling technique specified. Raises: ValueError: if the sampling option is not one of the accepted options. References: For further details, Flowerdew, J., 2014. Calibrating ensemble reliability whilst preserving spatial structure. Tellus, Series A: Dynamic Meteorology and Oceanography, 66(1), pp.1-20. Schefzik, R., Thorarinsdottir, T.L. & Gneiting, T., 2013. Uncertainty Quantification in Complex Simulation Models Using Ensemble Copula Coupling. Statistical Science, 28(4), pp.616-640. """ if sampling in ["quantile"]: # Generate percentiles from 1/N+1 to N/N+1. percentiles = np.linspace( 1/float(1+no_of_percentiles), no_of_percentiles/float(1+no_of_percentiles), no_of_percentiles).tolist() elif sampling in ["random"]: # Generate percentiles from 1/N+1 to N/N+1. # Random sampling doesn't currently sample the ends of the # distribution i.e. 0 to 1/N+1 and N/N+1 to 1. percentiles = np.random.uniform( 1/float(1+no_of_percentiles), no_of_percentiles/float(1+no_of_percentiles), no_of_percentiles) percentiles = sorted(list(percentiles)) else: msg = "The {} sampling option is not yet implemented.".format( sampling) raise ValueError(msg) return [item*100 for item in percentiles]
[docs]def create_cube_with_percentiles(percentiles, template_cube, cube_data, cube_unit=None): """ Create a cube with a percentile coordinate based on a template cube. The resulting cube will have an extra percentile coordinate compared with the template cube. The shape of the cube_data should be the shape of the desired output cube. Args: percentiles (list): Ensemble percentiles. There should be the same number of percentiles as the first dimension of cube_data. template_cube (iris.cube.Cube): Cube to copy all coordinates from. The template_cube does not contain any existing percentile coordinate. Metadata is also copied from this cube. cube_data (numpy.ndarray): Data to insert into the template cube. The shape of the cube_data, excluding the dimension associated with the percentile coordinate, should be the same as the shape of template_cube. For example, template_cube shape is (3, 3, 3), whilst the cube_data is (10, 3, 3, 3), where there are 10 percentiles. cube_unit (cf_units.Unit): The units of the data within the cube Returns: iris.cube.Cube: Cube containing a percentile coordinate as the zeroth dimension coordinate in addition to the coordinates and metadata from the template cube. """ percentile_coord = iris.coords.DimCoord( np.float32(percentiles), long_name='percentile', units=unit.Unit("%"), var_name='percentile') metadata_dict = copy.deepcopy(template_cube.metadata._asdict()) result = iris.cube.Cube(cube_data, **metadata_dict) if cube_unit is not None: result.units = cube_unit result.add_dim_coord(percentile_coord, 0) # For the dimension coordinates, the dimensions are incremented by one, # as the percentile coordinate has been added as the zeroth coordinate. # The dimension associated with the auxiliary and derived coordinates # has also been incremented by one. for coord in template_cube.dim_coords: dim, = template_cube.coord_dims(coord) result.add_dim_coord(coord.copy(), dim+1) for coord in template_cube.aux_coords: dims = template_cube.coord_dims(coord) dims = tuple([dim+1 for dim in dims]) result.add_aux_coord(coord.copy(), dims) for coord in template_cube.derived_coords: dims = template_cube.coord_dims(coord) dims = tuple([dim+1 for dim in dims]) result.add_aux_coord(coord.copy(), dims) return result
[docs]def get_bounds_of_distribution(bounds_pairing_key, desired_units): """ Gets the bounds of the distribution and converts the units of the bounds_pairing to the desired_units. This method gets the bounds values and units from the imported dictionaries: BOUNDS_FOR_ECDF and units_of_BOUNDS_FOR_ECDF. The units of the bounds are converted to be the desired units. Args: bounds_pairing_key (str): Name of key to be used for the BOUNDS_FOR_ECDF dictionary, in order to get the desired bounds_pairing. desired_units (cf_units.Unit): Units to which the bounds_pairing will be converted. Returns: bounds_pairing (tuple): Lower and upper bound to be used as the ends of the empirical cumulative distribution function, converted to have the desired units. Raises: KeyError: If the bounds_pairing_key is not within the BOUNDS_FOR_ECDF dictionary. """ # Extract bounds from dictionary of constants. try: bounds_pairing = BOUNDS_FOR_ECDF[bounds_pairing_key].value bounds_pairing_units = BOUNDS_FOR_ECDF[bounds_pairing_key].units except KeyError as err: msg = ("The bounds_pairing_key: {} is not recognised " "within BOUNDS_FOR_ECDF {}. \n" "Error: {}".format( bounds_pairing_key, BOUNDS_FOR_ECDF, err)) raise KeyError(msg) bounds_pairing_units = unit.Unit(bounds_pairing_units) bounds_pairing = bounds_pairing_units.convert( np.array(bounds_pairing), desired_units) return bounds_pairing
[docs]def insert_lower_and_upper_endpoint_to_1d_array( array_1d, low_endpoint, high_endpoint): """ For a 1d array, add a lower and upper endpoint. Args: array_1d (numpy.ndarray): 1d array of values low_endpoint (float or int): Number of use as the lower endpoint. high_endpoint (float or int): Number of use as the upper endpoint. Returns: numpy.ndarray: 1d array of values padded with the low_endpoint and high_endpoint. """ lower_array = np.array([low_endpoint]) upper_array = np.array([high_endpoint]) array_1d = np.concatenate((lower_array, array_1d, upper_array)) if array_1d.dtype == np.float64: array_1d = array_1d.astype(np.float32) return array_1d
[docs]def restore_non_probabilistic_dimensions( array_to_reshape, original_cube, input_probabilistic_dimension_name, output_probabilistic_dimension_length): """ Reshape a 2d array, so that it has the dimensions of the original cube, whilst ensuring that the probabilistic dimension is the first dimension. Args: array_to_reshape (numpy.ndarray): The array that requires reshaping. original_cube (iris.cube.Cube): Cube containing the desired shape to be reshaped to, apart from the probabilistic dimension, for example, [probabilistic_dimension, time, y, x]. input_probabilistic_dimension_name (str): Name of the dimension within the original cube, which represents the probabilistic dimension. output_probabilistic_dimension_length (int): Length of the probabilistic dimension, which will be used to create the shape to which the array_to_reshape will be reshaped to. Returns: numpy.ndarray: The array after reshaping. Raises: ValueError: If the probabilistic dimension is not the first on the original_cube. CoordinateNotFoundError: If the input_probabilistic_dimension_name is not a coordinate on the original_cube. """ shape_to_reshape_to = list(original_cube.shape) if original_cube.coords( input_probabilistic_dimension_name, dim_coords=True): if original_cube.coord_dims( input_probabilistic_dimension_name)[0] == 0: pat_coord_position = ( original_cube.coord_dims(input_probabilistic_dimension_name)) shape_to_reshape_to.pop(pat_coord_position[0]) else: msg = ("The {} coordinate is a dimension coordinate but is not " "the first dimension coordinate in the cube: {}.\n" "The enforce_coordinate_ordering function may be " "useful. ".format( input_probabilistic_dimension_name, original_cube)) raise ValueError(msg) elif original_cube.coords( input_probabilistic_dimension_name, dim_coords=False): pass else: msg = ("A {} coordinate is not available on the {} cube.".format( input_probabilistic_dimension_name, original_cube)) raise CoordinateNotFoundError(msg) shape_to_reshape_to = ( [output_probabilistic_dimension_length] + shape_to_reshape_to) return array_to_reshape.reshape(shape_to_reshape_to)