Source code for pyunicorn.climate.climate_data

#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# This file is part of pyunicorn.
# Copyright (C) 2008--2015 Jonathan F. Donges and pyunicorn authors
# URL: <http://www.pik-potsdam.de/members/donges/software>
# License: BSD (3-clause)

"""
Provides classes for generating and analyzing complex climate networks.
"""

#
#  Import essential packages
#

#  Import NumPy for the array object and fast numerics
import numpy as np
from numpy import random

from .. import Data


#
#  Define class ClimateData
#
[docs]class ClimateData(Data): """ Encapsulates spatio-temporal climate data. Provides methods to manipulate this data, i.e. calculate daily (monthly) mean values and anomaly values. @ivar data_source: (string) - The name of the data source (model, reanalysis, station) """ # # Defines internal methods #
[docs] def __init__(self, observable, grid, time_cycle, anomalies=False, observable_name="", observable_long_name=None, window=None, silence_level=0): """ Initialize an instance of ClimateData. The spatio-temporal window is described by the following dictionary:: window = {"time_min": 0., "time_max": 0., "lat_min": 0., "lat_max": 0., "lon_min": 0., "lon_max": 0.} :type observable: 2D array [time, index] :arg observable: The array of time series to be represented by the :class:`.Data` instance. :type grid: :class:`.Grid` instance :arg grid: The Grid representing the spatial coordinates associated to the time series and their temporal sampling. :arg int time_cycle: The annual cycle length of the data (units of samples). :arg bool anomalies: Indicates whether the data are climatological anomaly values. :arg str observable_name: A short name for the observable. :arg str observable_long_name: A long name for the observable. :arg dict window: Spatio-temporal window to select a view on the data. :arg int silence_level: The inverse level of verbosity of the object. """ Data.__init__(self, observable=observable, grid=grid, observable_name=observable_name, observable_long_name=observable_long_name, window=window, silence_level=silence_level) # Set class variables self.time_cycle = time_cycle """(number (int)) - The annual cycle length of the data (units of samples).""" # Set flags self._flag_phase_mean = False self._phase_mean = None self.data_source = "" # If data are anomalies skip automatic calculation of anomalies if anomalies: self._flag_anomaly = True self._anomaly = observable else: self._flag_anomaly = False
[docs] def __str__(self): """ Returns a string representation. """ return 'ClimateData:\n' + Data.__str__(self)
[docs] def clear_cache(self): """ Clean up cache. Is reversible, since all cached information can be recalculated from basic data. """ Data.clear_cache(self) if self._flag_phase_mean: del self._phase_mean self._flag_phase_mean = False if self._flag_anomaly: del self._anomaly self._flag_anomaly = False # # Define alternative constructors #
@classmethod
[docs] def Load(cls, file_name, observable_name, time_cycle, time_name="time", latitude_name="lat", longitude_name="lon", data_source=None, file_type="NetCDF", window=None, vertical_level=None, silence_level=0): """ Initialize an instance of ClimateData. Supported file types ``file_type`` are: - "NetCDF" for regular (rectangular) grids - "iNetCDF" for irregular (e.g. geodesic) grids or station data. The :index:`spatio-temporal window` is described by the following dictionary:: window = {"time_min": 0., "time_max": 0., "lat_min": 0., "lat_max": 0., "lon_min": 0., "lon_max": 0.} :arg str file_name: The name of the data file. :arg str observable_name: The short name of the observable within data file (particularly relevant for NetCDF). :arg int time_cycle: The annual cycle length of the data (units of samples). :arg str time_name: The name of the time variable within data file. :arg str latitude_name: The name of the latitude variable within data file. :arg str longitude_name: The name of longitude variable within data file. :arg str data_source: The name of the data source (model, reanalysis, station). :arg str file_type: The format of the data file. :arg dict window: Spatio-temporal window to select a view on the data. :arg int vertical_level: The vertical level to be extracted from the data file. Is ignored for horizontal data sets. If None, the first level in the data file is chosen. :arg int silence_level: The inverse level of verbosity of the object. """ dimension_names = {"time": time_name, "lat": latitude_name, "lon": longitude_name} # Load data using _load_data method from parent class res = cls._load_data(file_name=file_name, file_type=file_type, dimension_names=dimension_names, observable_name=observable_name, vertical_level=vertical_level) # Create instance of ClimateData data = cls(observable=res["observable"], grid=res["grid"], time_cycle=time_cycle, observable_name=res["observable_name"], observable_long_name=res["observable_long_name"], window=window, silence_level=silence_level) # Set class variables data.file_name = file_name data.file_type = file_type data.vertical_level = vertical_level data.data_source = data_source return data
@staticmethod
[docs] def SmallTestData(): """ Return test data set of 6 time series with 10 sampling points each. **Example:** >>> r(Data.SmallTestData().observable()) array([[ 0. , 1. , 0. , -1. , -0. , 1. ], [ 0.309 , 0.9511, -0.309 , -0.9511, 0.309 , 0.9511], [ 0.5878, 0.809 , -0.5878, -0.809 , 0.5878, 0.809 ], [ 0.809 , 0.5878, -0.809 , -0.5878, 0.809 , 0.5878], [ 0.9511, 0.309 , -0.9511, -0.309 , 0.9511, 0.309 ], [ 1. , 0. , -1. , -0. , 1. , 0. ], [ 0.9511, -0.309 , -0.9511, 0.309 , 0.9511, -0.309 ], [ 0.809 , -0.5878, -0.809 , 0.5878, 0.809 , -0.5878], [ 0.5878, -0.809 , -0.5878, 0.809 , 0.5878, -0.809 ], [ 0.309 , -0.9511, -0.309 , 0.9511, 0.309 , -0.9511]]) :rtype: ClimateData instance :return: a ClimateData instance for testing purposes. """ data = Data.SmallTestData() return ClimateData(observable=data.observable(), grid=data.grid, time_cycle=5, silence_level=2) # # Define methods to work with (climatological) anomaly data #
[docs] def phase_indices(self): """ Return time indices associated to all phases in the annual cycle. In other words, provides all time indices falling into a particular day, month etc. of the year. Just includes measurements from years for which complete data exists. .. note:: Only the currently selected spatio-temporal window is considered. .. note:: Only the currently selected spatio-temporal window is considered. **Example:** >>> ClimateData.SmallTestData().phase_indices() array([[0, 5], [1, 6], [2, 7], [3, 8], [4, 9]]) :rtype: 2D Numpy array (int) [phase index, year] :return: the time indices associated to all phases of the annual cycle. """ range_years = int(self.grid.grid_size()["time"] / self.time_cycle) phase_indices = np.zeros((self.time_cycle, range_years), dtype=int) for i in xrange(self.time_cycle): phase_indices[i, :] = np.arange(i, range_years * self.time_cycle, self.time_cycle) return phase_indices
[docs] def indices_selected_phases(self, selected_phases): """ Return sorted time indices associated to certain phase indices. .. note:: Only the currently selected spatio-temporal window is considered. **Example:** >>> ClimateData.SmallTestData().indices_selected_phases([0,1,4]) array([0, 1, 4, 5, 6, 9]) :arg [int] selected_phases: The selected phase indices. :rtype: 1D array (int) :return: the sorted time indices corresponding to chosen phase indices. """ # Get all phase_indices = self.phase_indices() # Select time indices corresponding to chosen phase indices selected_indices = phase_indices[selected_phases, :] # Flatten and sort selected time indices selected_indices = selected_indices.flatten() selected_indices.sort() return selected_indices
[docs] def indices_selected_months(self, selected_months): """ Return sorted time indices associated to certain months. Currently, only cycle lengths of 12 (monthly data) and 360 (standardized daily data) are supported. .. note:: Only the currently selected spatio-temporal window is considered. :arg [number] selected_months: The selected months. :rtype: 1D array (int) :return: the sorted time indices corresponding to chosen months. """ if self.time_cycle == 12: return self.indices_selected_phases(selected_months) elif self.time_cycle == 360: selected_days = [] for month in selected_months: for day in xrange(30): selected_days.append(month * 30 + day) return self.indices_selected_phases(selected_days) else: raise NotImplementedError("Currently only time cycles 12 and 360 " "are supported")
[docs] def _calculate_phase_mean(self): """ Calculate mean values of observable for each phase of the annual cycle. This is also commonly referred to as climatological mean, e.g., the mean temperature for all Januaries in the data set for monthly time resolution (time_cycle=12). .. note:: Only the currently selected spatio-temporal window is considered. :rtype: 2D Numpy array [cycle index, node index] :return: the mean values of observable for each phase of the annual cycle. """ if self.silence_level <= 1: print "Calculating climatological mean values..." # Get raw data observable = self.observable() # Get time cycle time_cycle = self.time_cycle # Get number of time series N = observable.shape[1] # Initialize phase_mean = np.zeros((time_cycle, N)) # Calculate mean value for each day (month) on each node for i in xrange(time_cycle): phase_mean[i, :] = observable[i::time_cycle, :].mean(axis=0) return phase_mean
[docs] def phase_mean(self): """ Return mean values of observable for each phase of the annual cycle. For further comments, see :meth:`_calculate_phase_mean`. .. note:: Only the currently selected spatio-temporal window is considered. **Example:** >>> r(ClimateData.SmallTestData().phase_mean()) array([[ 0.5 , 0.5 , -0.5 , -0.5 , 0.5 , 0.5 ], [ 0.63 , 0.321 , -0.63 , -0.321 , 0.63 , 0.321 ], [ 0.6984, 0.1106, -0.6984, -0.1106, 0.6984, 0.1106], [ 0.6984, -0.1106, -0.6984, 0.1106, 0.6984, -0.1106], [ 0.63 , -0.321 , -0.63 , 0.321 , 0.63 , -0.321 ]]) :rtype: 2D Numpy array [cycle index, node index] :return: the mean values of observable for each phase of the annual cycle. """ if not self._flag_phase_mean: self._phase_mean = self._calculate_phase_mean() self._flag_phase_mean = True return self._phase_mean
[docs] def _calculate_anomaly(self): """ Calculate anomaly time series from observable. To obtain climatological anomaly time series, the climatological means are subtracted from each sample in the original time series. This procedure is also known as phase averaging. .. note:: Only the currently selected spatio-temporal window is considered. :rtype: 2D Numpy array [time, node index] :return: the anomalized time series. """ if self.silence_level <= 1: print "Calculating daily (monthly) anomaly values..." # Get raw data observable = self.observable() # Get time cycle time_cycle = self.time_cycle # Initialize array anomaly = np.zeros(observable.shape) # Thanks to Jakob Runge for i in xrange(time_cycle): sample = observable[i::time_cycle, :] anomaly[i::time_cycle, :] = sample - sample.mean(axis=0) return anomaly
[docs] def anomaly(self): """ Return anomaly time series from observable. For further comments, see :meth:`_calculate_anomaly`. .. note:: Only the currently selected spatio-temporal window is considered. **Example:** >>> r(ClimateData.SmallTestData().anomaly()[:,0]) array([-0.5 , -0.321 , -0.1106, 0.1106, 0.321 , 0.5 , 0.321 , 0.1106, -0.1106, -0.321 ]) :rtype: 2D Numpy array [time, node index] :return: the anomalized time series. """ if not self._flag_anomaly: self._anomaly = self._calculate_anomaly() self._flag_anomaly = True return self._anomaly
[docs] def anomaly_selected_months(self, selected_months): """ Return anomaly time series from observable for selected months. For further comments, see :meth:`_calculate_anomaly`. .. note:: Only the currently selected spatio-temporal window is considered. :arg [number] selected_months: The selected months. :rtype: 2D array [time, node index] :return: the anomalized time series for selected months. """ selected_indices = self.indices_selected_months(selected_months) print selected_indices return self.anomaly()[selected_indices, :]
[docs] def shuffled_anomaly(self): """ Return the randomly shuffled anomaly time series. Each anomaly time series is shuffled individually. .. note:: Only the currently selected spatio-temporal window is considered. **Example** (Anomaly with and without temporal shuffling should have the same standard deviation along time axis): >>> r(ClimateData.SmallTestData().anomaly().std(axis=0)) array([ 0.31 , 0.6355, 0.31 , 0.6355, 0.31 , 0.6355]) >>> r(ClimateData.SmallTestData().shuffled_anomaly().std(axis=0)) array([ 0.31 , 0.6355, 0.31 , 0.6355, 0.31 , 0.6355]) :rtype: 2D Numpy array [time, node index] :return: the anomalized and shuffled time series. """ if self.silence_level <= 1: print "Shuffling anomaly time series for significance tests..." N = self.grid.grid_size()["space"] shuffled_anomaly = np.empty(self.anomaly().shape) for i in xrange(N): temp = self.anomaly()[:, i].copy() random.shuffle(temp) shuffled_anomaly[:, i] = temp return shuffled_anomaly
[docs] def set_window(self, window): """ Set spatio-temporal window. Calls set_window method of parent class Data and additionally sets flags, so that measures derived from data (mean, anomaly) will be recalculated for new window. The spatio-temporal window is described by the following dictionary:: window = {"time_min": 0., "time_max": 0., "lat_min": 0., "lat_max": 0., "lon_min": 0., "lon_max": 0.} If the temporal boundaries are equal, the data's full time range is selected. If any of the two corresponding spatial boundaries are equal, the data's full spatial extension is included. For more information see :meth:`pyunicorn.Data.set_window`. **Example:** >>> data = ClimateData.SmallTestData() >>> data.set_window(window={"time_min": 0., "time_max": 0., ... "lat_min": 10., "lat_max": 20., ... "lon_min": 5., "lon_max": 10.}) >>> r(data.anomaly()) array([[ 0.5 , -0.5 ], [ 0.321 , -0.63 ], [ 0.1106, -0.6984], [-0.1106, -0.6984], [-0.321 , -0.63 ], [-0.5 , 0.5 ], [-0.321 , 0.63 ], [-0.1106, 0.6984], [ 0.1106, 0.6984], [ 0.321 , 0.63 ]]) :type window: dictionary :arg window: The spatio-temporal window to select a view on the data. """ Data.set_window(self, window) self._flag_phase_mean = False self._flag_anomaly = False
[docs] def set_global_window(self): """ Set the view on the whole data set. Select the full data set and creates a data array as well as a corresponding Grid object to access this window from outside. **Example** (Set smaller window and subsequently restore global window): >>> data = ClimateData.SmallTestData() >>> data.set_window(window={"time_min": 0., "time_max": 4., ... "lat_min": 10., "lat_max": 20., ... "lon_min": 5., "lon_max": 10.}) >>> data.grid.grid()["lat"] array([ 10., 15.], dtype=float32) >>> data.set_global_window() >>> data.grid.grid()["lat"] array([ 0., 5., 10., 15., 20., 25.], dtype=float32) """ Data.set_global_window(self) self._flag_phase_mean = False self._flag_anomaly = False