Source code for improver.utilities.statistical_operations

# -*- coding: utf-8 -*-
# -----------------------------------------------------------------------------
# (C) British Crown Copyright 2017-2019 Met Office.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
#   list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
#   this list of conditions and the following disclaimer in the documentation
#   and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
#   contributors may be used to endorse or promote products derived from
#   this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""Module to contain statistical operations."""

import warnings

import iris
import numpy as np
from iris.exceptions import CoordinateNotFoundError

from improver import BasePlugin
from improver.metadata.probabilistic import find_percentile_coordinate
from improver.utilities.cube_checker import check_cube_coordinates


[docs]class ProbabilitiesFromPercentiles2D(BasePlugin): r""" Generate a 2-dimensional field of probabilities by interpolating a percentiled cube of data to required points. Examples: Given a reference field of values against a percentile coordinate, an interpolation is performed using another field of values of the same type (e.g. height). This returns the percentile with which these heights would be associated in the reference field. This effectively uses the field of values as a 2-dimensional set of thresholds, and the percentiles looked up correspond to the probabilities of these thresholds being reached. Snow-fall level:: Reference field: Percentiled snow fall level (m ASL) Other field: Orography (m ASL) 300m ----------------- 30th Percentile snow fall level 200m ----_------------ 20th Percentile snow fall level 100m ---/-\----------- 10th Percentile snow fall level 000m --/---\---------- 0th Percentile snow fall level ______/ \_________ Orogaphy The orography heights are compared against the heights that correspond with percentile values to find the band in which they fall; this diagram hides the 2-dimensional variability of the snow fall level. The percentile values are then interpolated to the height of the point being considered. This constructs a 2-dimensional field of probabilities that snow will be falling at each point in the orography field. """
[docs] def __init__(self, percentiles_cube, output_name): """ Initialise class. Sets an inverse_ordering (bool) switch to true for cases where the percentiled data increases in the opposite sense to the percentile coordinate: e.g. 0th Percentile - Value = 10 10th Percentile - Value = 5 20th Percentile - Value = 0 Args: percentiles_cube (iris.cube.Cube): The percentiled field from which probabilities will be obtained using the input cube. This cube should contain a percentiles dimension, with fields of values that correspond to these percentiles. The cube passed to the process method will contain values of the same diagnostic (e.g. height) as this reference cube. output_name (str): The name of the cube being created, e.g.'probability_of_snow_falling_level_below_ground_level' """ self.percentile_coordinate = find_percentile_coordinate( percentiles_cube) if self.percentile_coordinate.points.shape[0] < 2: msg = ("Percentile coordinate has only one value. Interpolation " "using ProbabilitiesFromPercentiles2D requires multiple " "values are provided.") raise ValueError(msg) self.percentiles_cube = percentiles_cube self.output_name = output_name # Set inverse_ordering switch percentile_slices = percentiles_cube.slices_over( self.percentile_coordinate) self.inverse_ordering = False first_percentile = percentile_slices.next().data for percentile_values in percentile_slices: last_percentile = percentile_values.data if (first_percentile - last_percentile >= 0).all(): self.inverse_ordering = True
def __repr__(self): """Represent the configured plugin instance as a string.""" result = ('<ProbabilitiesFromPercentiles2D: percentiles_cube: {}, ' 'output_name: {}, inverse_ordering: {}'.format( self.percentiles_cube, self.output_name, self.inverse_ordering)) return result
[docs] def create_probability_cube(self, cube, threshold_cube): """ Create a 2-dimensional probability cube in which to store the calculated probabilities. Args: cube (iris.cube.Cube): Template for the output probability cube. This is a slice created in process, containing a percentile coordinate as well as x and y coordinates. We keep all the metadata from this cube but dispose of the percentile coordinate as we will be filling the cube with probabilities. threshold_cube (iris.cube.Cube): A 2-dimensional cube of "threshold" values containing metadata required to construct a probability cube. Returns: iris.cube.Cube: A new 2-dimensional probability cube with suitable metadata. """ cube_format = next(cube.slices([cube.coord(axis='y'), cube.coord(axis='x')])) probabilities = cube_format.copy(data=np.full(cube_format.shape, np.nan, dtype=float)) try: probabilities.remove_coord(self.percentile_coordinate) except CoordinateNotFoundError: pass probabilities.units = 1 probabilities.rename(self.output_name) probabilities.attributes['thresholded_using'] = threshold_cube.name() probabilities.attributes['relative_to_threshold'] = 'below' if self.inverse_ordering is True: probabilities.attributes['relative_to_threshold'] = 'above' return probabilities
[docs] def percentile_interpolation(self, threshold_cube, percentiles_cube): """ Using a percentiles_cube containing a distinct percentile distribution for each point on a 2-dimensional grid, we can interpolate through each distribution to obtain a probability. The point to which we interpolate is defined by the threshold_cube. Note that the current implementation assumes that in cases of a degenerate percentile distribution, the right most bin in which a threshold value is found is chosen. e.g. :: Percentile: 0 10 20 30 40 50 ... Height (m): 0 0 0 15 30 40 ... A height of 0m will be associated with a probabilty of 20%. This is not correct, but nor is the approach of taking 0%. The percentile approach is not suitable with these degenerate distributions, so be wary of the returned probabilities. Examples: This simple linear interpolator works in the following way. percentiles_cube:: [ [[2.0, 2.0, 2.0], [2.0, 2.0, 2.0], [2.0, 2.0, 2.0]], [[4.0, 4.0, 4.0], [4.0, 4.0, 4.0], [4.0, 4.0, 4.0]] ] threshold_cube:: [ [1.0, 1.0, 1.0], [3.0, 3.0, 3.0], [5.0, 5.0, 5.0] ] value_bounds:: [ [[np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan]], [[np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan]] ] percentile_bounds:: [ [[-1, -1, -1], [-1, -1, -1], [-1, -1, -1]], [[-1, -1, -1], [-1, -1, -1], [-1, -1, -1]] ] 1. Create slices over each percentile, and using the correct inequality (as determined by inverse_ordering) compare the threshold values to the percentiles slice; here we assume inverse_ordering is False, so we use >=. We then populate the value_bounds and percentile_bounds arrays. Slice 0 - 0th Percentile:: [[1.0 >= 2.0, 1.0 >= 2.0, 1.0 >= 2.0], [3.0 >= 2.0, 3.0 >= 2.0, 3.0 >= 2.0], [5.0 >= 2.0, 5.0 >= 2.0, 5.0 >= 2.0]] [[False, False, False], [True, True, True], [True, True, True]] The value_bounds array has a leading dimensions with 2 indices to be associated with the lower [0] and upper bounds [1] about the threshold being considered. The [0] index is populated with the values in the slice of percentiles_cube at every True index. The [1] index is populated with the values in the next slice of percentiles_cube. :: [ [[np.nan, np.nan, np.nan], [2.0, 2.0, 2.0], [2.0, 2.0, 2.0]], [[np.nan, np.nan, np.nan], [4.0, 4.0, 4.0], [4.0, 4.0, 4.0]] ] The percentile_bounds array is also contains a leading dimension associated with lower and upper bounds about the thresholds. The lower bound array is populated at every True index with the current percentile value (0 in this first slice), whilst the upper bound array takes the percentile value from the next slice. :: [ [[-1, -1, -1], [0, 0, 0], [0, 0, 0]], [[-1, -1, -1], [50, 50, 50], [50, 50, 50]] ] After the same process is applied to the next slice, the 50th percentile, we end up with value_bounds:: [ [[np.nan, np.nan, np.nan], [2.0, 2.0, 2.0], [4.0, 4.0, 4.0]], [[np.nan, np.nan, np.nan], [4.0, 4.0, 4.0], [4.0, 4.0, 4.0]] ] And percentile bounds:: [ [[-1, -1, -1], [0, 0, 0], [50, 50, 50]], [[-1, -1, -1], [50, 50, 50], [50, 50, 50]] ] Note that where there is no availble +1 index in the percentiles_cube the upper bound is set to be the same as the lower_bound. 2. When all slices have been interated over, the interpolants are calculated using the threshold values and the values_bounds. :: (threshold_cube.data - lower_bound) / (upper_bound - lower_bound) If the upper_bound and lower_bound are the same this leads to a divide by 0 calculation, resulting in np.inf as the output. 3. The interpolants are used to calculate the percentile value at each point in the array using the percentile_bounds. :: lower_percentile_bound + interpolants * (upper_percentile_bounds - lower_percentile_bounds) The percentiles are divided by 100 to give a fractional probability. 5. Any probabilities that are calculated to be np.inf indicate that the associated point has a threshold value that is above the top percentile band. These points are given a probability value of 1. 4. Any points for which the calculated probability is np.nan had threshold values that were never found to fall within a percentile band, and so must be below the lowest band. These points are given a probability value of 0. Args: threshold_cube (iris.cube.Cube): A 2-dimensional cube of "threshold" values for which it is desired to obtain probability values from the percentiled reference cube. This cube should have the same x and y dimensions as percentiles_cube. percentiles_cube (iris.cube.Cube): A 3-dimensional cube, 1 dimension describing the percentile distributions, and 2-dimensions shared with the threshold_cube, typically x and y. Returns: iris.cube.Cube: A 2-dimensional cube of probabilities obtained by interpolating between percentile values. """ percentiles = self.percentile_coordinate.points probabilities = self.create_probability_cube(percentiles_cube, threshold_cube) # Create array with additional 2 dimensions to contain upper and lower # bounds. array_shape = [2] + list(threshold_cube.shape) percentile_bounds = np.full(array_shape, -1, dtype=np.float32) value_bounds = np.full(array_shape, np.nan, dtype=np.float32) for index, pslice in enumerate(percentiles_cube.slices_over( self.percentile_coordinate)): # Change to use < & > to force degenerate percentile distributions # to use the first percentile band that the threshold falls within. indices = (threshold_cube.data <= pslice.data if self.inverse_ordering else threshold_cube.data >= pslice.data) percentile_bounds[0, indices] = percentiles[index] value_bounds[0, indices] = pslice.data[indices] try: # Usual behaviour where the threshold value falls between # values corresponding to percentiles. percentile_bounds[1, indices] = percentiles[index + 1] value_bounds[1, indices] = percentiles_cube[index+1].data[ indices] except IndexError: # Invoked if we have reached the top of the available values. percentile_bounds[1, indices] = percentiles[index] value_bounds[1, indices] = pslice.data[indices] with np.errstate(divide='ignore', invalid='ignore'): numerator = (threshold_cube.data - value_bounds[0]) denominator = np.diff(value_bounds, n=1, axis=0)[0] interpolants = numerator/denominator interpolants[denominator == 0] = np.inf with np.errstate(invalid='ignore'): probabilities.data, = (percentile_bounds[0] + interpolants * np.diff(percentile_bounds, n=1, axis=0)) probabilities.data = probabilities.data/np.float32(100.) above_top_band = np.isinf(interpolants) below_bottom_band = np.isnan(value_bounds[0]) probabilities.data[below_bottom_band] = 0. probabilities.data[above_top_band] = 1. return probabilities
[docs] def process(self, threshold_cube): """ Slice the percentiles cube over any non-spatial coordinates (realization, time, etc) if present, and call the percentile interpolation method for each resulting cube. Args: threshold_cube (iris.cube.Cube): A cube of values, that effectively behave as thresholds, for which it is desired to obtain probability values from a percentiled reference cube. Returns: iris.cube.Cube: A cube of probabilities obtained by interpolating between percentile values at the "threshold" level. """ cube_slices = self.percentiles_cube.slices( [self.percentile_coordinate, self.percentiles_cube.coord(axis='y'), self.percentiles_cube.coord(axis='x')]) if threshold_cube.ndim != 2: msg = ('threshold cube has too many ({} > 2) dimensions - slicing ' 'to x-y grid'.format(threshold_cube.ndim)) warnings.warn(msg) threshold_cube = next(threshold_cube.slices([ threshold_cube.coord(axis='y'), threshold_cube.coord(axis='x')])) if threshold_cube.units != self.percentiles_cube.units: threshold_cube.convert_units(self.percentiles_cube.units) output_cubes = iris.cube.CubeList() for cube_slice in cube_slices: output_cube = self.percentile_interpolation(threshold_cube, cube_slice) output_cubes.append(output_cube) probability_cube = output_cubes.merge_cube() reference_cube = next(self.percentiles_cube.slices_over( self.percentile_coordinate)) probability_cube = check_cube_coordinates(reference_cube, probability_cube) return probability_cube