Skip to content
Snippets Groups Projects
Commit 30fd03aa authored by Kristiyan Blagov's avatar Kristiyan Blagov
Browse files

removed STAN.py

parent 1fc3ec66
No related branches found
No related tags found
No related merge requests found
# -*- coding: utf-8 -*-
"""
Created on Sat Jan 11 22:22:47 2025
@author: Kristiyan
"""
import numpy as np
import pandas as pd
#import matplotlib.pyplot as plt
#import statsmodels.api as sm
from statsmodels.tsa.stattools import acf
from scipy.signal import find_peaks
import scipy.stats as st
from sklearn.preprocessing import MinMaxScaler
import math
import os
import time
from collections import defaultdict
# The IDs of UCR Anomaly Archive time series sorted into arrays of corresponding anomaly type
amplitude_change = ["013","014", "037", "042", "044", "053", "057", "066", "091", "100", "104", "121", "122", "145", "150", "152", "161", "165", "174", "199", "205", '206', '207', "215", "217"]
flat = ['045', '078', '110', '153', '186', '236']
freq_change = ['023', '026', '032', '033', '034', '040', '048', '099', '101', '131', '134', '140', '141', '142', '148', '156', '202', '222', '223', '224', '227', '228', '229', '244', '245', "246", '247']
local_drop = ['005', '043', '054', '063', '077', '086', '092', '102', '106', '113', '151', '162', '171', '185', '194', '200', '232', '233', '237', '238']
local_peak = ['007', '021', '024', '025', '030', '049', '062', '064', '085', '089', '097', '115', '129', '132', '133', '138', '157', '170', '172', '193', '197', '234', '235', '239', '243']
missing_drop = ['002', '072', '180']
missing_peak = ['004', '019', '035', '036', '059', '060', '094', '112', '127', '143', '144', '167', '168', '248']
noise = ['003', '008', '027', '028', '029', '039', '056', '067', '068', '083', '095', '098', '107', '111', '116', '135', '136', '137', '147', '164', '175', '176', '191']
outlier_datasets = ['011', '012', '015', '018', '070', '071', '084', '119', '120', '123', '126', '178', '179', '192', '213', '216', '220', '226']
reverse = ['020', '022', '038', '052', '055', '065', '090', '103', '128', '130', '146', '160', '163', '173', '198', '201', '203', '209', '212', '225', '230', '242', '249']
reverse_horizontal = ['016', '017', '058', '096', '124', '125', '166', '231']
sampling_rate = ['050', '061', '105', '158', '169']
signal_shift = ['204']
smoothed_increase = ['241']
steep_increase = ['051', '159']
time_shift = ['069', '074', '075', '079', '080', '081', '082', '087', '088', '108', '177', '182', '183', '187', '188', '189', '190', '195', '196', '208']
time_warping = ['031', '076', '139', '184']
unusual_pattern = ['001', '006', '009', '010', '041', '046', '047', '073', '093', '109', '114', '117', '118', '149', '154', '155', '181', '210', '211', '214', '218', '219', '221', '240', '250']
def count_direction_changes(subseq):
count = 0
direction = "unchanged"
for i in range(1,len(subseq)):
if subseq[i] - subseq[i-1] > 0:
new_direction = "up"
elif subseq[i] - subseq[i-1] < 0:
new_direction = "down"
else:
new_direction = "unchanged"
if new_direction != direction:
count += 1
direction = new_direction
return count
def point_anomaly(subseq):
return np.std(subseq)
def highest_autocorrelation(ts, min_size=10, max_size=1000):
acf_values = acf(ts, fft=True, nlags=int(ts.shape[0]/2))
peaks, _ = find_peaks(acf_values)
peaks = peaks[np.logical_and(peaks >= min_size, peaks < max_size)]
corrs = acf_values[peaks]
if len(peaks) == 0:
peaks, _ = find_peaks(acf_values)
peaks = peaks[np.logical_and(peaks >= min_size, peaks < 2000)]
corrs = acf_values[peaks]
if len(peaks) == 0:
return -1
return peaks[np.argmax(corrs)]
os.chdir(os.getcwd() + "\\UCR_Anomaly_Archive")
files = os.listdir()
time_series_list = []
for file in files:
if file.endswith(".txt"):
time_series_list.append(file)
ult_outlier = []
ult_correct = 0
wrong_datasets = []
wrong_factors = []
wrong_outlier_idx = []
bigger_dist = []
smaller_dist = []
to_upper_bound = []
to_lower_bound = []
turnpts = []
correct_ts = []
incorrect_ts = []
sum_stat_results = defaultdict(int)
summary_statistics= [np.min, np.max, np.mean, np.std, st.skew, st.kurtosis, count_direction_changes, point_anomaly]
start_time = time.time()
for dataset in time_series_list:
name_split = dataset.split("_")
# To terate over time series with a specific anomaly type:
# "if name_split[0] in amplitude_change:", "if name_split[0] in flat:", ...
if name_split[0]:
data = np.array(pd.read_csv(dataset, header = None)).flatten()
name_split[6] = name_split[6][:-4]
y_line = np.mean([int(name_split[-1]), int(name_split[-2])])
period_length = highest_autocorrelation(data)
#begin index, end index and length of ground truth anomaly
begin = int(name_split[-2])
end = int(name_split[-1])
l = end-begin+1
data_original = data
data = np.diff(data)
factor = []
outlier_idx = []
#powers_of_2 = [period_length]
for f in summary_statistics:
if f.__name__ == "count_direction_changes":
data = data_original
if f.__name__ == "point_anomaly":
period_length = 3
agg_data = []
for j in range(len(data)):
if j % period_length == 0 and (j+period_length) <=len(data):
agg_data.append(f(data[j:j+period_length]))
#Scaling the data in range [0,1]
data_to_scale = [[x] for x in agg_data]
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data_to_scale)
scaled_data = scaled_data.flatten()
train_test_idx = math.floor((int(name_split[-3])/period_length))
# Set the index of either the maximal or the minimal value from
# the test dataset of the transformed data as an outlier index
if np.max(scaled_data[train_test_idx:]) - np.mean(scaled_data) >= np.mean(scaled_data) - np.min(scaled_data[train_test_idx:]):
outlier = np.argmax(scaled_data[train_test_idx:]) + train_test_idx
else:
outlier = np.argmin(scaled_data[train_test_idx:]) + train_test_idx
# Save the anomaly detection accuracy for each summary statistic
if outlier*period_length+period_length/2 > min(begin-l,begin-100) and outlier*period_length+period_length/2 < max(end+l,end+100):
sum_stat_results[f.__name__] += 1
#Maximal and minimal values in the train set, used for comparison in next steps
sort_scaled = sorted(scaled_data[:train_test_idx])
scaled_lower_bound = sort_scaled[0]
scaled_upper_bound = sort_scaled[-1]
# If the detected outlier index is within the test dataset then
# it will be disregarded
if outlier <= train_test_idx:
factor.append(0)
outlier_idx.append(0)
# Save the index of the outlier plus the its difference with the
# max value of the train dataset
elif scaled_data[outlier] > scaled_upper_bound:
factor.append(scaled_data[outlier] - scaled_upper_bound)
outlier_idx.append(outlier*period_length+period_length/2)
# Save the index of the outlier plus the its difference with the
# min value of the train dataset
elif scaled_data[outlier] < scaled_lower_bound:
factor.append(scaled_lower_bound - scaled_data[outlier])
outlier_idx.append(outlier*period_length+period_length/2)
# If the detected outlier is in the test dataset, but is smaller
# than the max and bigger then the min of the train dataset
else:
factor.append(0)
outlier_idx.append(outlier*period_length+period_length/2)
if f.__name__ == "count_direction_changes":
data = np.diff(data)
if f.__name__ == "point_anomaly":
period_length = highest_autocorrelation(data)
# Pick the anomaly index with largest factor
max_fact = np.argmax(factor)
ult_outlier.append(outlier_idx[max_fact])
# Check if the anomaly index is within the range of the ground truth
if outlier_idx[max_fact] >= min(begin-l,begin-100) and outlier_idx[max_fact] <= max(end+l,end+100):
ult_correct += 1
# The commented lines below were used for experimenting
#to_lower_bound.append(outlier_idx[max_fact] - min(begin-l,begin-100))
#to_upper_bound.append(max(end+l,end+100) - outlier_idx[max_fact])
#correct_ts.append(name_split[0])
#elif outlier_idx[max_fact] < min(begin-l,begin-100):
# smaller_dist.append(min(begin-l,begin-100) - outlier_idx[max_fact])
# incorrect_ts.append(name_split[0])
#elif outlier_idx[max_fact] > max(end+l,end+100):
# bigger_dist.append(outlier_idx[max_fact] - max(end+l,end+100))
# incorrect_ts.append(name_split[0])
#else:
# wrong_factors.append(factor)
# wrong_outlier_idx.append(outlier_idx)
# wrong_datasets.append(name_split[0])
# incorrect_ts.append(name_split[0])
end_time = time.time()
runtime = end_time - start_time
print("Total Runtime:", runtime, "seconds")
print("Runtime per Dataset:", runtime/250, "seconds")
print("Correctly detected anomalies:", ult_correct, "out of 250")
#print(correct_ts)
#print(incorrect_ts)
print("Correctly detected anomalies per summary statistic:")
print(sum_stat_results)
#print(factor)
#print(outlier_idx)
#print(ult_outlier)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment