Source code for anomalydetection.backend.engine.robust_z_engine

# -*- coding:utf-8 -*- #
#
# Anomaly Detection Framework
# Copyright (C) 2018 Bluekiri BigData Team <bigdata@bluekiri.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

import numpy as np
import scipy.stats as st

from anomalydetection.common.logging import LoggingMixin
from anomalydetection.backend.engine import BaseEngine
from anomalydetection.backend.entities.output_message import AnomalyResult
from statsmodels.robust.scale import mad


[docs]class RobustDetector(BaseEngine, LoggingMixin):

    def __init__(self, window=100, threshold=0.9999) -> None:
        """
        Anomaly detection engine based in robust statistics,
        median and median absolute deviation.

        :param window:     window of samples to work with
        :param threshold:  threshold for confidence
        """
        self._data = np.full((window, 1), fill_value=np.nan)
        self._median = np.nan
        self._mad = np.nan
        self.threshold = threshold

    def _update_buffer(self, value):
        self._data[:-1] = self._data[1:]
        self._data[-1] = value

    def _update_statistics(self):
        if not np.isnan(self._data).any():
            self._median = np.median(self._data)
            self._mad = mad(self._data)[0]

    def _update(self, value):
        self._update_buffer(value=value)
        self._update_statistics()

[docs]    def predict(self, value: float, **kwargs) -> AnomalyResult:
        results = {}
        if np.isnan(self._data).any():
            results['anomaly_probability'] = -1
            results['is_anomaly'] = -1
            results['value_upper_limit'] = -1
            results['value_lower_limit'] = -1
        else:
            if self._mad != 0:
                z_score = np.abs(value - self._median) / self._mad
            else:
                z_score = np.inf
            results['anomaly_probability'] = 1 - st.norm.sf(z_score)
            results['is_anomaly'] = int(results['anomaly_probability'] >= self.threshold)
            results['value_upper_limit'] = \
                (self._median + self._mad*st.norm.ppf(self.threshold))
            results['value_lower_limit'] = \
                (self._median - self._mad*st.norm.ppf(self.threshold))
        self._update(value=value)

        return AnomalyResult(**results)