Source code for homcloud.interface.histogram

import matplotlib.pyplot as plt

import homcloud.plot_PD as plot_PD
import homcloud.histogram as homhisto
import homcloud.vectorize_PD as vectorize_PD
import homcloud.plot_PD_slice as plot_PD_slice
import homcloud.pdgm as pdgm
from homcloud.delegate import forwardable


[docs] @forwardable class HistoSpec(object): """ This class represents a 2D mesh on the plane of birth-death pairs. This class is useful to compute histograms from many diagrams with the same setting. Args: x_range (tuple(float, float) or None): The lower and upper range of the bins on x-axis. y_range (int): The number of bins on x-axis. y_range (tuple(float, float) or None): The lower and upper range of the bins on y-axis. Same as `x_range` if None is given. y_bins (int or None): The number of bins on y-axis. Same as `x_bins` if None is given. superlevel (bool): This should be True if your PDs come from superlevel filtrations, otherwise this should be False. Examples: >>> import homcloud.interface as hc >>> import matplotlib.pyplot as plt >>> pc = np.array([[0, 0, 0], [8, 0, 0], [5, 6, 0], [4, 2, 6],]) >>> pd = hc.PDList.from_alpha_filtration(pc).dth_diagram(1) >>> histospec = hc.HistoSpec((0, 20), 128) >>> histogram = histospec.pd_histogram(pd) >>> histogram.plot(colorbar={"type": "log"}, title="1st PD") >>> plt.show() -> The histogram is displayed. """ def __init__(self, x_range, xbins, y_range=None, ybins=None, pd=None, superlevel=False): rulers = homhisto.Ruler.create_xy_rulers(x_range, xbins, y_range, ybins, pd) self.spec = homhisto.HistoSpec.from_rulers(rulers[0], rulers[1], superlevel) # self.rulers = rulers # self.sign_flipped = superlevel self._histoinfo = None __delegator_definitions__ = {"spec": "sign_flipped"} @classmethod def from_histoinfo(cls, histoinfo): new_mesh = cls( (histoinfo["x-edges"][0], histoinfo["x-edges"][-1]), len(histoinfo["x-edges"]) - 1, (histoinfo["y-edges"][0], histoinfo["y-edges"][-1]), len(histoinfo["y-edges"]) - 1, ) new_mesh._histoinfo = histoinfo return new_mesh
[docs] def pd_histogram(self, pd): """Constructs a 2D histogram of `pd`. Args: pd (:class:`PD`): The diagram. Returns: :class:`Histogram`: The histogram. """ return Histogram(self.spec.histogram_from_diagram(pd))
[docs] def histogram_from_vector(self, vector): """ Construct a histogram from a vector. The histogram is constructed by the rule of persistence image method. Args: vector (numpy.ndarray): A vector. Returns: :class:`Histogram`: The histogram. """ return Histogram(self.spec.histogram_from_vector(vector))
[docs] def mask_from_vector(self, vector): """ Construct a mask histogram from a boolean vector. The histogram is constructed by the rule of persistence image method. Args: vector (numpy.ndarray): A boolean vector. Returns: :class:`MaskHistogram`: The "histogram" whose values in bins are True or False. """ return MaskHistogram(self.spec.histogram_from_vector(vector, homhisto.BinaryHistogram))
def mask_from_2darray(self, array): return MaskHistogram(homhisto.BinaryHistogram(array, self.spec))
[docs] def vector_size(self): """ Return the size of the vector generated by self. Returns: int: The size of the vector """ return self.spec.vector_size()
Mesh = HistoSpec
[docs] class PIVectorizeSpec(HistoSpec): """ This class represents a 2D mesh on the plane of birth-death pairs with information about vectorization by persistence image (PI) method. You can construct PI vectors from diagrams. Args: x_range (tuple[float, float] or None): The lower and upper range of the bins on x-axis. xbins (int): The number of bins on x-axis. y_range (tuple[float, float] or None): The lower and upper range of the bins on y-axis. Same as `x_range` if None is given. ybins (int or None): The number of bins on y-axis. Same as `x_bins` if None is given. weight (tuple or string or Callable[[float, float], float]): The information of the weight function. You can use one of the followings: * "none": A constant weight function * Callable[[float, float], float]: Any callable object that computes weights. The first and second arguments are birth and death times of a pair and the return value is a positive weight value. * ("atan", `c`, `p`): An arctangent weight function, `c` and `p` should be floats. The weight function is atan(c * (death - birth)**p) . * ("linear", `a`): A linear weight function. `a` should be a float. The weight function is a*(death - birth) . sigma (float): The standard deviation for the Gaussian distribution used by PI. superlevel (bool): This should be True if your PDs come from superlevel filtrations, otherwise this should be False. Internally, This parameter should be True if the PD has birth-death pairs with birth > death, and otherwise the parameter should be False. """ def __init__(self, x_range, xbins, y_range=None, ybins=None, weight=None, sigma=None, superlevel=False): super().__init__(x_range, xbins, y_range, ybins, None, superlevel) self.weight = self.weight_function(weight) self.sigma = sigma @staticmethod def weight_function(params): if params == "none": return lambda b, d: 1.0 if isinstance(params, tuple): if params[0] == "atan": return vectorize_PD.atan_weight_function(params[1], params[2]) if params[0] == "linear": return vectorize_PD.linear_weight_function(1.0 / params[1]) raise ValueError("Unknown weight type: {}".format(params[0])) if callable(params): return params
[docs] def vectorize(self, pd): """Vectroize `pd`. Args: pd (:class:`PD`): A persistence diagram. Returns: numpy.ndarray: The vectorized diagram. """ assert pd.sign_flipped == self.sign_flipped histo = self.spec.histogram_from_diagram(pd) histo.apply_weight(self.weight) histo.apply_gaussian_filter(self.sigma) return histo.vectorize()
[docs] def vectorize_pair(self, pair): """Vectorize a PD with a signle pair. Args: pd (:class:`Pair` or tuple[double, double]): A birth-death pair Returns: numpy.ndarray: The vectorized diagram. """ birth_time, death_time = tuple(pair) return self.vectorize(pdgm.SimplePDGM(0, [birth_time], [death_time]))
PIVectorizerMesh = PIVectorizeSpec
[docs] @forwardable class Histogram(object): """The class represents a histogram on birth-death plane. Methods: x_range() Returns: (tuple[int, int]): The lower and upper range of x-axis. x_bins() Returns: (int): The number of bins on x-axis. y_range() Returns: (tuple[int, int]): The lower and upper range of y-axis. y_bins() Returns: (int): The number of bins on y-axis. Attributes: xedges (numpy.ndarray[x_bins + 1]): The edges of bins on x-axis in ascending order. yedges (numpy.ndarray[y_bins + 1]): The edges of bins on y-axis in ascending order. """ def __init__(self, orig): self.orig = orig @property def values(self): """ (numpy.ndarary, shape (x_bins, y_bins)): 2-dimensional array of values in the bins. """ return self.orig.values @values.setter def values(self, values): self.orig.values = values __delegator_definitions__ = {"orig": ["x_range", "y_range", "x_bins", "y_bins", "xedges", "yedges"]}
[docs] def plot( self, colorbar={}, style="colorhistogram", title="", unit_name=None, font_size=None, aspect="equal", ax=None, levels=None, plot_ess=False, ): """Plot a histogram by matplotlib. Args: colorbar (dict): The specification of the histogram colors and the colorbar. The following fields are available for this dictionary. * "type" - The name of colorbar type. One of the followings is available. The default type is "linear". * "linear" - linear scale * "log" - log scale * "loglog" - log(log(n+1)+1) * "linear-midpoint" - linear scale with midpoint. You should specify the value of midpoint by the "midpoint" field. * "power" - n^p. You should specify p by "p" field. * "min" - The minimum of the colorbar. If this value is not specified, the value is determined by the minimum of histogram values. * "max" - The maximum of the colorbar. If this value is not specified, the value is determined by the maximum of histogram values. * "colormap" - matplotlib's colormap name style (string): The plotting style. "colorhistogram" or "contour" is available. title (string): The title of the output figure. unit_name (string or None): The unit name of the x-axis and y-axis. font_size (float or None): The font size. The font size is automatically determined if None is given. aspect (string): The X-Y aspect. "equal" or "auto" is available. ax (matplotlib.axes.Axes or None): The axes to be plotted on. By default (if None), `matplotlib.pyplot.subplot()` is used. levels (list of floats or None): The levels for coutour plotting. This argument is used only if `style` is "contour". plot_ess (bool): Essential pairs are plotted if True. Example: >>> import matplotlib.pyplot as plt >>> : >>> histogram.plot(colorbar={"type": "log", "max": 100}, >>> title="Title string") >>> plt.show() # To show the histogram on your display >>> plt.savefig("histogram.png") # To save the histogram figure. """ plotter = plot_PD.PDPlotter.find_plotter(style)( self.orig, self._zspec(colorbar), plot_PD.AuxPlotInfo(title, unit_name, font_size, aspect, plot_ess) ) if levels is not None: plotter.levels = levels plotter.plot(*self._fig_ax(ax))
@staticmethod def _zspec(colorbar): ztype = colorbar.get("type", "linear") vmax = colorbar.get("max") vmin = colorbar.get("min") if "colormap" in colorbar: colormap = plt.get_cmap(colorbar["colormap"]) else: colormap = None if ztype == "linear": return plot_PD.ZSpec.Linear(vmax, vmin, colormap) elif ztype == "log": return plot_PD.ZSpec.Log(vmax, vmin, colormap) elif ztype == "loglog": return plot_PD.ZSpec.LogLog(vmax, vmin, colormap) elif ztype == "linear-midpoint": return plot_PD.ZSpec.LinearMidPoint(colorbar["midpoint"], vmax, vmin, colormap) elif ztype == "power": return plot_PD.ZSpec.Power(colorbar["p"], vmax, vmin, colormap) else: raise RuntimeError("unknown colorbar type: {}").format(ztype) @staticmethod def _fig_ax(ax): if ax is None: return plt.subplots() else: return ax.get_figure(), ax
[docs] class MaskHistogram(Histogram): """ The class represents a histogram on birth-death plane whose values are booleans. This class is helpful to pick up all birth-death pairs in an area. meth:`HistoSpec.mask_from_vector` and meth:`HistoSpec.mask_from_2darray` is available for this purpose. """
[docs] def filter_pairs(self, pairs): """ Returns all pairs in the area of bins whose values are True. Args: pairs (sequence of :class:`Pair`): Pairs to be filtered. Returns: list of :class:`Pair`: The filtered pairs. Notes: In fact, this method can filter the list of :class:`OptimalVolume` by their birth and death times. """ return [pair for pair in pairs if self.orig.value_at(pair.birth_time(), pair.death_time())]
[docs] def filter_pd(self, pd): """ Returns all pairs of the persistence diagram in the area of bins whose values are True. Args: pd (:class:`PD`): A persistence diagram which has pairs to be filtered. Returns: list of :class:`Pair`: The filtered pairs. """ return self.filter_pairs(pd.pairs())
[docs] class SliceHistogram(object): """ This class represents a 1D histogram of birth-death pairs in the thin strip on a persistence diagram. You can create an instance by :meth:`PD.slice_histogram`. Attributes: values (numpy.ndarary): The histogram values. edges (nump.ndarray): The histogram bin edges. x1 (float): The x-coordinate of the one end of the strip. y1 (float): The y-coordinate of the one end of the strip. x2 (float): The x-coordinate of the another end of the strip. y2 (float): The y-coordinate of the another end of the strip. """ def __init__(self, values, edges, x1, y1, x2, y2): self.values = values self.edges = edges self.x1 = x1 self.y1 = y1 self.x2 = x2 self.y2 = y2
[docs] def plot(self, left_label=None, right_label=None, logscale=False, ax=None): """ Plots the historam. Args: left_label (string): The label text at (x1, y1) right_label (string): The label text at (x2, y2) logscale (bool): Linear scale is used if False, and Log scale is used if True. ax (matplotlib.axes.Axes or None): The axes to be plotted on. By default (if None), `matplotlib.pyplot.gca()` is used. """ ax = ax or plt.gca() width = self.edges[1] - self.edges[0] ax.bar(self.edges[:-1], self.values, width=width, align="edge", log=logscale) ax.set_xlim(-width / 2, 1 + width / 2) ax.set_xticks([0, 1]) ax.set_xticklabels( [ plot_PD_slice.construct_label(left_label, self.x1, self.y1), plot_PD_slice.construct_label(right_label, self.x2, self.y2), ] )