# -*- coding: utf-8 -*-
# -----------------------------------------------------------------------------
# (C) British Crown Copyright 2017-2019 Met Office.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""
This module defines the utilities required for Ensemble Copula Coupling
plugins.
"""
import copy
import random
import cf_units as unit
import iris
import numpy as np
from iris.exceptions import CoordinateNotFoundError
from improver.ensemble_copula_coupling.constants import BOUNDS_FOR_ECDF
[docs]def concatenate_2d_array_with_2d_array_endpoints(
array_2d, low_endpoint, high_endpoint):
"""
For a 2d array, add a 2d array as the lower and upper endpoints.
The concatenation to add the lower and upper endpoints to the 2d array
are performed along the second (index 1) dimension.
Args:
array_2d (numpy.ndarray):
2d array of values
low_endpoint (float or int):
Number used to create a 2d array of a constant value
as the lower endpoint.
high_endpoint (float or int):
Number of used to create a 2d array of a constant value
as the upper endpoint.
Returns:
numpy.ndarray:
2d array of values after padding with the low_endpoint and
high_endpoint.
"""
lower_array = (
np.full((array_2d.shape[0], 1), low_endpoint, dtype=array_2d.dtype))
upper_array = (
np.full((array_2d.shape[0], 1), high_endpoint, dtype=array_2d.dtype))
array_2d = np.concatenate(
(lower_array, array_2d, upper_array), axis=1)
return array_2d
[docs]def choose_set_of_percentiles(no_of_percentiles, sampling="quantile"):
"""
Function to create percentiles.
Args:
no_of_percentiles (int):
Number of percentiles.
sampling (str):
Type of sampling of the distribution to produce a set of
percentiles e.g. quantile or random.
Accepted options for sampling are:
* Quantile: A regular set of equally-spaced percentiles aimed
at dividing a Cumulative Distribution Function into
blocks of equal probability.
* Random: A random set of ordered percentiles.
Returns:
list of float:
Percentiles calculated using the sampling technique specified.
Raises:
ValueError: if the sampling option is not one of the accepted options.
References:
For further details, Flowerdew, J., 2014.
Calibrating ensemble reliability whilst preserving spatial structure.
Tellus, Series A: Dynamic Meteorology and Oceanography, 66(1), pp.1-20.
Schefzik, R., Thorarinsdottir, T.L. & Gneiting, T., 2013.
Uncertainty Quantification in Complex Simulation Models Using Ensemble
Copula Coupling.
Statistical Science, 28(4), pp.616-640.
"""
if sampling in ["quantile"]:
# Generate percentiles from 1/N+1 to N/N+1.
percentiles = np.linspace(
1/float(1+no_of_percentiles),
no_of_percentiles/float(1+no_of_percentiles),
no_of_percentiles).tolist()
elif sampling in ["random"]:
# Generate percentiles from 1/N+1 to N/N+1.
# Random sampling doesn't currently sample the ends of the
# distribution i.e. 0 to 1/N+1 and N/N+1 to 1.
percentiles = np.random.uniform(
1/float(1+no_of_percentiles),
no_of_percentiles/float(1+no_of_percentiles),
no_of_percentiles)
percentiles = sorted(list(percentiles))
else:
msg = "The {} sampling option is not yet implemented.".format(
sampling)
raise ValueError(msg)
return [item*100 for item in percentiles]
[docs]def create_cube_with_percentiles(percentiles, template_cube, cube_data,
cube_unit=None):
"""
Create a cube with a percentile coordinate based on a template cube.
The resulting cube will have an extra percentile coordinate compared with
the template cube. The shape of the cube_data should be the shape of the
desired output cube.
Args:
percentiles (list):
Ensemble percentiles. There should be the same number of
percentiles as the first dimension of cube_data.
template_cube (iris.cube.Cube):
Cube to copy all coordinates from.
The template_cube does not contain any existing percentile
coordinate. Metadata is also copied from this cube.
cube_data (numpy.ndarray):
Data to insert into the template cube.
The shape of the cube_data, excluding the dimension associated with
the percentile coordinate, should be the same as the shape of
template_cube.
For example, template_cube shape is (3, 3, 3), whilst the cube_data
is (10, 3, 3, 3), where there are 10 percentiles.
cube_unit (cf_units.Unit):
The units of the data within the cube
Returns:
iris.cube.Cube:
Cube containing a percentile coordinate as the zeroth dimension
coordinate in addition to the coordinates and metadata from the
template cube.
"""
percentile_coord = iris.coords.DimCoord(
np.float32(percentiles), long_name='percentile',
units=unit.Unit("%"), var_name='percentile')
metadata_dict = copy.deepcopy(template_cube.metadata._asdict())
result = iris.cube.Cube(cube_data, **metadata_dict)
if cube_unit is not None:
result.units = cube_unit
result.add_dim_coord(percentile_coord, 0)
# For the dimension coordinates, the dimensions are incremented by one,
# as the percentile coordinate has been added as the zeroth coordinate.
# The dimension associated with the auxiliary and derived coordinates
# has also been incremented by one.
for coord in template_cube.dim_coords:
dim, = template_cube.coord_dims(coord)
result.add_dim_coord(coord.copy(), dim+1)
for coord in template_cube.aux_coords:
dims = template_cube.coord_dims(coord)
dims = tuple([dim+1 for dim in dims])
result.add_aux_coord(coord.copy(), dims)
for coord in template_cube.derived_coords:
dims = template_cube.coord_dims(coord)
dims = tuple([dim+1 for dim in dims])
result.add_aux_coord(coord.copy(), dims)
return result
[docs]def get_bounds_of_distribution(bounds_pairing_key, desired_units):
"""
Gets the bounds of the distribution and converts the units of the
bounds_pairing to the desired_units.
This method gets the bounds values and units from the imported
dictionaries: BOUNDS_FOR_ECDF and units_of_BOUNDS_FOR_ECDF.
The units of the bounds are converted to be the desired units.
Args:
bounds_pairing_key (str):
Name of key to be used for the BOUNDS_FOR_ECDF dictionary, in order
to get the desired bounds_pairing.
desired_units (cf_units.Unit):
Units to which the bounds_pairing will be converted.
Returns:
bounds_pairing (tuple):
Lower and upper bound to be used as the ends of the
empirical cumulative distribution function, converted to have
the desired units.
Raises:
KeyError: If the bounds_pairing_key is not within the BOUNDS_FOR_ECDF
dictionary.
"""
# Extract bounds from dictionary of constants.
try:
bounds_pairing = BOUNDS_FOR_ECDF[bounds_pairing_key].value
bounds_pairing_units = BOUNDS_FOR_ECDF[bounds_pairing_key].units
except KeyError as err:
msg = ("The bounds_pairing_key: {} is not recognised "
"within BOUNDS_FOR_ECDF {}. \n"
"Error: {}".format(
bounds_pairing_key, BOUNDS_FOR_ECDF, err))
raise KeyError(msg)
bounds_pairing_units = unit.Unit(bounds_pairing_units)
bounds_pairing = bounds_pairing_units.convert(
np.array(bounds_pairing), desired_units)
return bounds_pairing
[docs]def insert_lower_and_upper_endpoint_to_1d_array(
array_1d, low_endpoint, high_endpoint):
"""
For a 1d array, add a lower and upper endpoint.
Args:
array_1d (numpy.ndarray):
1d array of values
low_endpoint (float or int):
Number of use as the lower endpoint.
high_endpoint (float or int):
Number of use as the upper endpoint.
Returns:
numpy.ndarray:
1d array of values padded with the low_endpoint and high_endpoint.
"""
lower_array = np.array([low_endpoint])
upper_array = np.array([high_endpoint])
array_1d = np.concatenate((lower_array, array_1d, upper_array))
if array_1d.dtype == np.float64:
array_1d = array_1d.astype(np.float32)
return array_1d
[docs]def restore_non_probabilistic_dimensions(
array_to_reshape, original_cube, input_probabilistic_dimension_name,
output_probabilistic_dimension_length):
"""
Reshape a 2d array, so that it has the dimensions of the original cube,
whilst ensuring that the probabilistic dimension is the first dimension.
Args:
array_to_reshape (numpy.ndarray):
The array that requires reshaping.
original_cube (iris.cube.Cube):
Cube containing the desired shape to be reshaped to, apart from the
probabilistic dimension, for example,
[probabilistic_dimension, time, y, x].
input_probabilistic_dimension_name (str):
Name of the dimension within the original cube, which represents
the probabilistic dimension.
output_probabilistic_dimension_length (int):
Length of the probabilistic dimension, which will be used to create
the shape to which the array_to_reshape will be reshaped to.
Returns:
numpy.ndarray:
The array after reshaping.
Raises:
ValueError: If the probabilistic dimension is not the first on the
original_cube.
CoordinateNotFoundError: If the input_probabilistic_dimension_name is
not a coordinate on the original_cube.
"""
shape_to_reshape_to = list(original_cube.shape)
if original_cube.coords(
input_probabilistic_dimension_name, dim_coords=True):
if original_cube.coord_dims(
input_probabilistic_dimension_name)[0] == 0:
pat_coord_position = (
original_cube.coord_dims(input_probabilistic_dimension_name))
shape_to_reshape_to.pop(pat_coord_position[0])
else:
msg = ("The {} coordinate is a dimension coordinate but is not "
"the first dimension coordinate in the cube: {}.\n"
"The enforce_coordinate_ordering function may be "
"useful. ".format(
input_probabilistic_dimension_name, original_cube))
raise ValueError(msg)
elif original_cube.coords(
input_probabilistic_dimension_name, dim_coords=False):
pass
else:
msg = ("A {} coordinate is not available on the {} cube.".format(
input_probabilistic_dimension_name, original_cube))
raise CoordinateNotFoundError(msg)
shape_to_reshape_to = (
[output_probabilistic_dimension_length] + shape_to_reshape_to)
return array_to_reshape.reshape(shape_to_reshape_to)