Source code for anomalydetection.backend.engine.robust_z_engine

# -*- coding:utf-8 -*- #
#
# Anomaly Detection Framework
# Copyright (C) 2018 Bluekiri BigData Team <bigdata@bluekiri.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

import numpy as np
import scipy.stats as st

from anomalydetection.common.logging import LoggingMixin
from anomalydetection.backend.engine import BaseEngine
from anomalydetection.backend.entities.output_message import AnomalyResult
from statsmodels.robust.scale import mad


[docs]class RobustDetector(BaseEngine, LoggingMixin): def __init__(self, window=100, threshold=0.9999) -> None: """ Anomaly detection engine based in robust statistics, median and median absolute deviation. :param window: window of samples to work with :param threshold: threshold for confidence """ self._data = np.full((window, 1), fill_value=np.nan) self._median = np.nan self._mad = np.nan self.threshold = threshold def _update_buffer(self, value): self._data[:-1] = self._data[1:] self._data[-1] = value def _update_statistics(self): if not np.isnan(self._data).any(): self._median = np.median(self._data) self._mad = mad(self._data)[0] def _update(self, value): self._update_buffer(value=value) self._update_statistics()
[docs] def predict(self, value: float, **kwargs) -> AnomalyResult: results = {} if np.isnan(self._data).any(): results['anomaly_probability'] = -1 results['is_anomaly'] = -1 results['value_upper_limit'] = -1 results['value_lower_limit'] = -1 else: if self._mad != 0: z_score = np.abs(value - self._median) / self._mad else: z_score = np.inf results['anomaly_probability'] = 1 - st.norm.sf(z_score) results['is_anomaly'] = int(results['anomaly_probability'] >= self.threshold) results['value_upper_limit'] = \ (self._median + self._mad*st.norm.ppf(self.threshold)) results['value_lower_limit'] = \ (self._median - self._mad*st.norm.ppf(self.threshold)) self._update(value=value) return AnomalyResult(**results)