Source code for simplebench.stats.stats

# -*- coding: utf-8 -*-
"""Base benchmark statistics class."""
from __future__ import annotations

import statistics
from math import isclose
from typing import Any, Sequence

from ..exceptions import SimpleBenchKeyError, SimpleBenchTypeError
from ..si_units import si_scale_to_unit, si_unit_base
from ..validators import (
    validate_float,
    validate_non_blank_string,
    validate_non_negative_float,
    validate_positive_float,
    validate_positive_int,
    validate_sequence_of_numbers,
)
from .exceptions.stats import _StatsErrorTag, _StatsSummaryErrorTag


[docs] class Stats: '''Generic container for statistics on a benchmark. :ivar str unit: The unit of measurement for the benchmark (e.g., "ops/s"). (read only) :ivar float scale: The scale factor for the interval (e.g. 1 for seconds). (read only) :ivar int rounds: The number of rounds each data point represents. (read only) :ivar tuple[float | int, ...] data: Tuple of data points. (read only) :ivar float mean: The mean operations per time interval. (read only) :ivar float median: The median operations per time interval. (read only) :ivar float minimum: The minimum operations per time interval. (read only) :ivar float maximum: The maximum operations per time interval. (read only) :ivar float standard_deviation: The standard deviation of operations per time interval. (read only) :ivar float relative_standard_deviation: The relative standard deviation of ops per time interval. (read only) :ivar tuple[float, ...] percentiles: Percentiles of operations per time interval. (read only) ''' __slots__ = ('_unit', '_scale', '_rounds', '_data', '_percentiles', '_mean', '_median', '_minimum', '_maximum', '_standard_deviation', '_relative_standard_deviation', '_statistics_as_dict', '_statistics_and_data_as_dict') def __init__(self, *, unit: str, scale: float, data: Sequence[int | float], rounds: int = 1) -> None: """Initialize the Stats object. :param str unit: The unit of measurement for the benchmark (e.g., "ops/s"). :param float scale: The scale factor for the interval (e.g. 1 for seconds). :param Sequence[int | float] data: Sequence of data points. :param int rounds: The number of rounds each data point represents. :raises SimpleBenchTypeError: If any of the arguments are of the wrong type. :raises SimpleBenchValueError: If any of the arguments have invalid values. """ self._unit: str = validate_non_blank_string( unit, 'unit', _StatsErrorTag.INVALID_UNIT_ARG_TYPE, _StatsErrorTag.INVALID_UNIT_ARG_VALUE) self._scale: float = validate_positive_float( scale, 'scale', _StatsErrorTag.INVALID_SCALE_ARG_TYPE, _StatsErrorTag.INVALID_SCALE_ARG_VALUE) self._rounds: int = validate_positive_int( rounds, 'rounds', _StatsErrorTag.INVALID_ROUNDS_ARG_TYPE, _StatsErrorTag.INVALID_ROUNDS_ARG_VALUE) # data is left unsorted to allow for time series data to be preserved self._data: tuple[int | float, ...] = tuple(validate_sequence_of_numbers( value=data, field_name='data', allow_empty=False, type_tag=_StatsErrorTag.INVALID_DATA_ARG_TYPE, value_tag=_StatsErrorTag.INVALID_DATA_ARG_ITEM_TYPE)) self._percentiles: tuple[float, ...] | None = None self._mean: float | None = None self._median: float | None = None self._minimum: float | None = None self._maximum: float | None = None self._standard_deviation: float | None = None self._relative_standard_deviation: float | None = None self._statistics_as_dict: dict[str, str | float | dict[int, float] | list[int | float]] | None = None self._statistics_and_data_as_dict: dict[str, str | float | dict[int, float] | list[int | float]] | None = None @property def unit(self) -> str: '''The unit of the data.''' return self._unit @property def scale(self) -> float: '''The scale of the data.''' return self._scale @property def rounds(self) -> int: '''The number of rounds each data point represents.''' return self._rounds @property def data(self) -> tuple[int | float, ...]: '''The data points.''' return self._data @property def mean(self) -> float: '''The mean of the data.''' if self._mean is None: self._mean = statistics.mean(self.data) if self.data else 0.0 return self._mean @property def median(self) -> float: '''The median of the data.''' if self._median is None: self._median = statistics.median(self.data) if self.data else 0.0 return self._median @property def minimum(self) -> float: '''The minimum of the data.''' if self._minimum is None: self._minimum = float(min(self.data)) if self.data else 0.0 return self._minimum @property def maximum(self) -> float: '''The maximum of the data.''' if self._maximum is None: self._maximum = float(max(self.data)) if self.data else 0.0 return self._maximum @property def standard_deviation(self) -> float: '''The standard deviation of the data.''' if self._standard_deviation is None: self._standard_deviation = statistics.stdev(self.data) if len(self.data) > 1 else 0.0 return self._standard_deviation @property def relative_standard_deviation(self) -> float: '''The relative standard deviation of the data.''' if self._relative_standard_deviation is None: self._relative_standard_deviation = abs(self.standard_deviation / self.mean * 100) if self.mean else 0.0 return self._relative_standard_deviation @property def percentiles(self) -> tuple[float, ...]: '''Percentiles of the data. Returns the 0th through 100th percentiles of the data as an immutable tuple. ''' if self._percentiles is None: self._percentiles = self._calculate_percentiles() return self._percentiles def _calculate_percentiles(self) -> tuple[float, ...]: """Helper to calculate percentiles. Note: statistics.quantiles with n=102 and method='inclusive' is used to calculate the percentiles from 0 to 100 inclusive (it generates 101 cut points, which correspond to percentiles 0 through 100). Returns: A tuple of percentiles keyed positionally by percent from 0 to 100. """ percentiles_n: list[int] = list(range(0, 101)) if len(self.data) == 1: return tuple(float(self.data[0]) for _ in percentiles_n) quantile_values = statistics.quantiles(self.data, n=102, method='inclusive') return tuple(quantile_values) @property def as_dict(self) -> dict[str, str | float | dict[int, float] | tuple[int | float, ...]]: '''Returns the statistics and data as a JSON-serializable dictionary. This includes all the statistics as well as the raw data points. The data values are scaled according to the scale factor to provide human-readable values using the base unit rather than the scaled unit. The unit is normalized to its SI base unit representation. (e.g., "ms" becomes "s") The dictionary is mutability-safe as all data is either a primitive or a copy. Returns: A dictionary containing the statistics and the scaled data points. ''' # Immutability is preserved because all values are primitives or copies already stats = self.stats_summary.as_dict stats['type'] = f'{self.__class__.__name__}:statistics' stats['data'] = tuple(value / self.scale for value in self.data) return stats @property def stats_summary(self) -> StatsSummary: '''Returns a StatsSummary object created from this Stats object. Returns: A StatsSummary object containing the same statistics as this Stats object. ''' return StatsSummary.from_stats(self)
[docs] @classmethod def from_dict(cls, data: dict[str, Any]) -> Stats: """Construct a Stats object from a dictionary. Example: .. code-block:: python stats_dict = { "unit": "ops/s", "scale": 1, "data": [1000, 2000, 1500, 3000, 2500] } stats = Stats.from_dict(stats_dict) print(stats.mean) # Output: 2000.0 :param dict data: A dictionary containing the stats data. Must contain 'data' key with a non-empty sequence of data points consisting of integers or floats. :return: A Stats object constructed from the provided dictionary. :raises SimpleBenchTypeError: If the data, unit, or scale arguments are of the wrong type. :raises SimpleBenchKeyError: If the data dictionary does not contain a 'unit' key and no unit argument is provided. :raises SimpleBenchValueError: If the data dictionary does not contain a non-empty 'data' key with at least one data point, if the scale argument is not greater than zero, or if the unit argument is an empty string """ if not isinstance(data, dict): raise SimpleBenchTypeError('The data argument must be a dictionary.', tag=_StatsErrorTag.FROM_DICT_INVALID_DATA_ARG_TYPE) if 'unit' not in data: raise SimpleBenchKeyError('The data dictionary is missing the required "unit" key.', tag=_StatsErrorTag.FROM_DICT_MISSING_UNIT_KEY) if 'scale' not in data: raise SimpleBenchKeyError('The data dictionary is missing the required "scale" key.', tag=_StatsErrorTag.FROM_DICT_MISSING_SCALE_KEY) if 'rounds' not in data: raise SimpleBenchKeyError('The data dictionary is missing the required "rounds" key.', tag=_StatsErrorTag.FROM_DICT_MISSING_ROUNDS_KEY) if 'data' not in data: raise SimpleBenchKeyError('The data dictionary is missing the required "data" key.', tag=_StatsErrorTag.FROM_DICT_MISSING_DATA_KEY) return cls(unit=data['unit'], scale=data['scale'], rounds=data['rounds'], data=data['data']) # type: ignore[arg-type]
def __eq__(self, other: object) -> bool: """Compare two Stats objects for equality. Equality is based on stats statistics and not on object identity. It handles scale differences between two Stats objects and compares the statistics accordingly using an appropriate tolerance for floating-point comparisons. It also verifies that the units are equivalent when converted to their SI base units. It does not consider the raw data points in the comparison as they will differ between a basic Stats object and a StatsSummary object derived from it. :param object other: The other object to compare against. :return: True if the objects are considered equal, False otherwise. :raises SimpleBenchValueError: If either Stats object has a scale of zero. """ if not isinstance(other, (Stats, StatsSummary)): return NotImplemented # this handles scale differences between two Stats objects self_base_unit: str = si_unit_base(self.unit) other_base_unit: str = si_unit_base(other.unit) if self_base_unit != other_base_unit: return False scale_by: float = si_scale_to_unit(base_unit=self_base_unit, current_unit=other.unit, target_unit=self.unit) relative_scale: float = self.scale / other.scale if self.rounds != other.rounds: return False if not isclose(scale_by, relative_scale): return False if not (isclose(self.mean, other.mean / relative_scale) and isclose(self.median, other.median / relative_scale) and isclose(self.minimum, other.minimum / relative_scale) and isclose(self.maximum, other.maximum / relative_scale) and isclose(self.standard_deviation, other.standard_deviation / relative_scale) and isclose(self.relative_standard_deviation, other.relative_standard_deviation)): return False if len(self.percentiles) != len(other.percentiles): return False for self_pct, other_pct in zip(self.percentiles, other.percentiles): if not isclose(self_pct, other_pct / relative_scale): return False return si_unit_base(self.unit) == si_unit_base(other.unit) def __repr__(self) -> str: return (f"{self.__class__.__name__}(unit='{self.unit}', scale={self.scale}, rounds={self.rounds}, " f"data=[{', '.join(str(d) for d in self.data)}])")
[docs] class StatsSummary: '''Container for summary statistics of a benchmark, exclusive of raw data points. This is a lightweight, data-only version of the Stats class, suitable for serialization or reporting when raw data points are not needed. :ivar str unit: The unit of measurement for the benchmark (e.g., "ops/s"). (read only) :ivar float scale: The scale factor for the interval (e.g. 1 for seconds). (read only) :ivar int rounds: The number of rounds each data point represents. (read only) :ivar float mean: The mean operations per time interval. (read only) :ivar float median: The median operations per time interval. (read only) :ivar float minimum: The minimum operations per time interval. (read only) :ivar float maximum: The maximum operations per time interval. (read only) :ivar float standard_deviation: The standard deviation of operations per time interval. (read only) :ivar float relative_standard_deviation: The relative standard deviation of ops per time interval. (read only) :ivar tuple[float, ...] percentiles: Percentiles of operations per time interval. (read only) :ivar tuple[int | float, ...] data: Always an empty tuple as StatsSummary does not contain raw data points. (read only) ''' __slots__ = ('_unit', '_scale', '_rounds', '_mean', '_median', '_minimum', '_maximum', '_standard_deviation', '_relative_standard_deviation', '_percentiles', '_statistics_as_dict') def __init__(self, # pylint: disable=too-many-arguments *, unit: str, scale: float, rounds: int, mean: float, median: float, minimum: float, maximum: float, standard_deviation: float, relative_standard_deviation: float, percentiles: tuple[float, ...]): """Initialize the StatsSummary object. :param str unit: The unit of measurement for the data (e.g., "ops/s"). :param float scale: The scale factor the data (e.g. 1.0 for seconds). :param int rounds: The number of rounds each data point represents. :param float mean: The mean data point. :param float median: The median data point. :param float minimum: The minimum data point. :param float maximum: The maximum data point. :param float standard_deviation: The standard deviation of data. :param float relative_standard_deviation: The relative standard deviation of data. :param tuple[float, ...] percentiles: Percentiles of data. :raises SimpleBenchTypeError: If any of the arguments are of the wrong type. :raises SimpleBenchValueError: If any of the arguments have invalid values. """ self._unit = validate_non_blank_string( unit, 'unit', _StatsSummaryErrorTag.INVALID_UNIT_ARG_TYPE, _StatsSummaryErrorTag.INVALID_UNIT_ARG_VALUE) self._scale = validate_positive_float( scale, 'scale', _StatsSummaryErrorTag.INVALID_SCALE_ARG_TYPE, _StatsSummaryErrorTag.INVALID_SCALE_ARG_VALUE) self._rounds = validate_positive_int( rounds, 'rounds', _StatsSummaryErrorTag.INVALID_ROUNDS_ARG_TYPE, _StatsSummaryErrorTag.INVALID_ROUNDS_ARG_VALUE) self._mean = validate_float( mean, 'mean', _StatsSummaryErrorTag.INVALID_MEAN_ARG_TYPE) self._median = validate_float( median, 'median', _StatsSummaryErrorTag.INVALID_MEDIAN_ARG_TYPE) self._minimum = validate_float( minimum, 'minimum', _StatsSummaryErrorTag.INVALID_MINIMUM_ARG_TYPE) self._maximum = validate_float( maximum, 'maximum', _StatsSummaryErrorTag.INVALID_MAXIMUM_ARG_TYPE) self._standard_deviation = validate_non_negative_float( standard_deviation, 'standard_deviation', _StatsSummaryErrorTag.INVALID_STANDARD_DEVIATION_ARG_TYPE, _StatsSummaryErrorTag.INVALID_STANDARD_DEVIATION_ARG_VALUE) self._relative_standard_deviation = validate_non_negative_float( relative_standard_deviation, 'relative_standard_deviation', _StatsSummaryErrorTag.INVALID_RELATIVE_STANDARD_DEVIATION_ARG_TYPE, _StatsSummaryErrorTag.INVALID_RELATIVE_STANDARD_DEVIATION_ARG_VALUE) self._percentiles = tuple(validate_sequence_of_numbers( percentiles, 'percentiles', allow_empty=False, type_tag=_StatsSummaryErrorTag.INVALID_PERCENTILES_ARG_TYPE, value_tag=_StatsSummaryErrorTag.INVALID_PERCENTILES_ARG_VALUE)) self._statistics_as_dict = None @property def unit(self) -> str: '''The unit of the data.''' return self._unit @property def scale(self) -> float: '''The scale of the data.''' return self._scale @property def rounds(self) -> int: '''The number of rounds each data point represents.''' return self._rounds @property def mean(self) -> float: '''The mean of the data.''' return self._mean @property def median(self) -> float: '''The median of the data.''' return self._median @property def minimum(self) -> float: '''The minimum of the data.''' return self._minimum @property def maximum(self) -> float: '''The maximum of the data.''' return self._maximum @property def standard_deviation(self) -> float: '''The standard deviation of the data.''' return self._standard_deviation @property def relative_standard_deviation(self) -> float: '''The relative standard deviation of the data.''' return self._relative_standard_deviation @property def percentiles(self) -> tuple[float, ...]: '''Percentiles of the data.''' return self._percentiles @property def data(self) -> tuple[int | float, ...]: '''The data points. This is always an empty tuple as a StatsSummary does not contain raw data points. ''' return tuple() def __eq__(self, other: object) -> bool: """Compare this StatsSummary to another Stats or StatsSummary object. Equality is based on stats statistics and not on object identity. It handles scale differences between two objects and compares the statistics accordingly using an appropriate tolerance for floating-point comparisons. It also verifies that the units are equivalent when converted to their SI base units. :param object other: The other object to compare against. :return: True if the objects are considered equal, False otherwise. """ if not isinstance(other, (Stats, StatsSummary)): return NotImplemented # this handles scale differences between two Stats objects self_base_unit: str = si_unit_base(self.unit) other_base_unit: str = si_unit_base(other.unit) if self_base_unit != other_base_unit: return False scale_by: float = si_scale_to_unit(base_unit=self_base_unit, current_unit=other.unit, target_unit=self.unit) relative_scale: float = self.scale / other.scale if self.rounds != other.rounds: return False if not isclose(scale_by, relative_scale): return False if not (isclose(self.mean, other.mean / relative_scale) and isclose(self.median, other.median / relative_scale) and isclose(self.minimum, other.minimum / relative_scale) and isclose(self.maximum, other.maximum / relative_scale) and isclose(self.standard_deviation, other.standard_deviation / relative_scale) and isclose(self.relative_standard_deviation, other.relative_standard_deviation)): return False if len(self.percentiles) != len(other.percentiles): return False for self_pct, other_pct in zip(self.percentiles, other.percentiles): if not isclose(self_pct, other_pct / relative_scale): return False return True
[docs] @classmethod def from_stats(cls, stats: Stats) -> StatsSummary: """Construct a new StatsSummary object from a Stats object. :param Stats stats: The Stats object to derive the summary from. :return: A new StatsSummary object containing the same statistics as the provided Stats object. :raises SimpleBenchTypeError: If the stats argument is not a Stats object. """ if not isinstance(stats, Stats): raise SimpleBenchTypeError( "The stats argument must be a Stats object.", tag=_StatsSummaryErrorTag.FROM_STATS_INVALID_STATS_ARG_TYPE) return cls( unit=stats.unit, scale=stats.scale, rounds=stats.rounds, mean=stats.mean, median=stats.median, minimum=stats.minimum, maximum=stats.maximum, standard_deviation=stats.standard_deviation, relative_standard_deviation=stats.relative_standard_deviation, percentiles=stats.percentiles )
[docs] @classmethod def from_dict(cls, data: dict[str, Any]) -> StatsSummary: """Construct a StatsSummary object from a dictionary. Example: .. code-block:: python stats_summary_dict = { "unit": "ops/s", "scale": 1.0, "rounds": 1, "mean": 2000.0, "median": 2000.0, "minimum": 1000.0, "maximum": 3000.0, "standard_deviation": 790.5694150420949, "relative_standard_deviation": 39.52847075252201, "percentiles": [1000.0, 1300.0, 1600.0, 1900.0, 2200.0, 2500.0, 2800.0, 3000.0, 3000.0] } stats_summary = StatsSummary.from_dict(stats_summary_dict) print(stats_summary.mean) # Output: 2000.0 :param dict data: A dictionary containing the stats data. Must contain 'data' key with a non-empty sequence of data points consisting of integers or floats. :return: A StatsSummary object constructed from the provided dictionary. :raises SimpleBenchTypeError: If the data, unit, or scale arguments are of the wrong type. :raises SimpleBenchKeyError: If the data dictionary does not contain a 'unit' key and no unit argument is provided. :raises SimpleBenchValueError: If the data dictionary does not contain a non-empty 'data' key with at least one data point, if the scale argument is not greater than zero, or if the unit argument is an empty string """ if not isinstance(data, dict): raise SimpleBenchTypeError('The data argument must be a dictionary.', tag=_StatsErrorTag.FROM_DICT_INVALID_DATA_ARG_TYPE) required_keys = [ 'unit', 'scale', 'rounds', 'mean', 'median', 'minimum', 'maximum', 'standard_deviation', 'relative_standard_deviation', 'percentiles' ] keys_for_construction = {} for key in required_keys: if key not in data: raise SimpleBenchKeyError(f"The data dictionary is missing the required '{key}' key.", tag=_StatsSummaryErrorTag.FROM_DICT_MISSING_KEY) keys_for_construction[key] = data[key] return cls(**keys_for_construction) # type: ignore[arg-type] # pylint: disable=missing-kwoa
@property def as_dict(self) -> dict[str, str | float | dict[int, float] | tuple[int | float, ...]]: '''Returns the statistics as a JSON-serializable dictionary. The data values are scaled according to the scale factor to provide human-readable values using the base unit rather than the scaled unit. The unit is converted to its SI base unit representation. (e.g., "ms" becomes "s") This does not include raw data points, only the statistics. The dictionary is mutability-safe as all data is either a primitive or a copy. Returns: A dictionary containing the statistics. ''' # Immutability is preserved because all values are primitives or copies already return { 'type': f'{self.__class__.__name__}', 'unit': si_unit_base(self.unit), 'scale': 1.0, 'rounds': self.rounds, 'mean': self.mean / self.scale, 'median': self.median / self.scale, 'minimum': self.minimum / self.scale, 'maximum': self.maximum / self.scale, 'standard_deviation': self.standard_deviation / self.scale, 'relative_standard_deviation': self.relative_standard_deviation, 'percentiles': tuple(value / self.scale for value in self.percentiles) } def __repr__(self) -> str: return (f"{self.__class__.__name__}(unit='{self.unit}', scale={self.scale}, rounds={self.rounds}, " f"mean={self.mean}, median={self.median}, minimum={self.minimum}, maximum={self.maximum}, " f"standard_deviation={self.standard_deviation}, " f"relative_standard_deviation={self.relative_standard_deviation}, " f"percentiles=[{', '.join(str(p) for p in self.percentiles)}])")