Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
B
BTW2025
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Issue analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Kristiyan Blagov
BTW2025
Commits
30fd03aa
Commit
30fd03aa
authored
1 month ago
by
Kristiyan Blagov
Browse files
Options
Downloads
Patches
Plain Diff
removed STAN.py
parent
1fc3ec66
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
STAN.py
+0
-248
0 additions, 248 deletions
STAN.py
with
0 additions
and
248 deletions
STAN.py
deleted
100644 → 0
+
0
−
248
View file @
1fc3ec66
# -*- coding: utf-8 -*-
"""
Created on Sat Jan 11 22:22:47 2025
@author: Kristiyan
"""
import
numpy
as
np
import
pandas
as
pd
#import matplotlib.pyplot as plt
#import statsmodels.api as sm
from
statsmodels.tsa.stattools
import
acf
from
scipy.signal
import
find_peaks
import
scipy.stats
as
st
from
sklearn.preprocessing
import
MinMaxScaler
import
math
import
os
import
time
from
collections
import
defaultdict
# The IDs of UCR Anomaly Archive time series sorted into arrays of corresponding anomaly type
amplitude_change
=
[
"
013
"
,
"
014
"
,
"
037
"
,
"
042
"
,
"
044
"
,
"
053
"
,
"
057
"
,
"
066
"
,
"
091
"
,
"
100
"
,
"
104
"
,
"
121
"
,
"
122
"
,
"
145
"
,
"
150
"
,
"
152
"
,
"
161
"
,
"
165
"
,
"
174
"
,
"
199
"
,
"
205
"
,
'
206
'
,
'
207
'
,
"
215
"
,
"
217
"
]
flat
=
[
'
045
'
,
'
078
'
,
'
110
'
,
'
153
'
,
'
186
'
,
'
236
'
]
freq_change
=
[
'
023
'
,
'
026
'
,
'
032
'
,
'
033
'
,
'
034
'
,
'
040
'
,
'
048
'
,
'
099
'
,
'
101
'
,
'
131
'
,
'
134
'
,
'
140
'
,
'
141
'
,
'
142
'
,
'
148
'
,
'
156
'
,
'
202
'
,
'
222
'
,
'
223
'
,
'
224
'
,
'
227
'
,
'
228
'
,
'
229
'
,
'
244
'
,
'
245
'
,
"
246
"
,
'
247
'
]
local_drop
=
[
'
005
'
,
'
043
'
,
'
054
'
,
'
063
'
,
'
077
'
,
'
086
'
,
'
092
'
,
'
102
'
,
'
106
'
,
'
113
'
,
'
151
'
,
'
162
'
,
'
171
'
,
'
185
'
,
'
194
'
,
'
200
'
,
'
232
'
,
'
233
'
,
'
237
'
,
'
238
'
]
local_peak
=
[
'
007
'
,
'
021
'
,
'
024
'
,
'
025
'
,
'
030
'
,
'
049
'
,
'
062
'
,
'
064
'
,
'
085
'
,
'
089
'
,
'
097
'
,
'
115
'
,
'
129
'
,
'
132
'
,
'
133
'
,
'
138
'
,
'
157
'
,
'
170
'
,
'
172
'
,
'
193
'
,
'
197
'
,
'
234
'
,
'
235
'
,
'
239
'
,
'
243
'
]
missing_drop
=
[
'
002
'
,
'
072
'
,
'
180
'
]
missing_peak
=
[
'
004
'
,
'
019
'
,
'
035
'
,
'
036
'
,
'
059
'
,
'
060
'
,
'
094
'
,
'
112
'
,
'
127
'
,
'
143
'
,
'
144
'
,
'
167
'
,
'
168
'
,
'
248
'
]
noise
=
[
'
003
'
,
'
008
'
,
'
027
'
,
'
028
'
,
'
029
'
,
'
039
'
,
'
056
'
,
'
067
'
,
'
068
'
,
'
083
'
,
'
095
'
,
'
098
'
,
'
107
'
,
'
111
'
,
'
116
'
,
'
135
'
,
'
136
'
,
'
137
'
,
'
147
'
,
'
164
'
,
'
175
'
,
'
176
'
,
'
191
'
]
outlier_datasets
=
[
'
011
'
,
'
012
'
,
'
015
'
,
'
018
'
,
'
070
'
,
'
071
'
,
'
084
'
,
'
119
'
,
'
120
'
,
'
123
'
,
'
126
'
,
'
178
'
,
'
179
'
,
'
192
'
,
'
213
'
,
'
216
'
,
'
220
'
,
'
226
'
]
reverse
=
[
'
020
'
,
'
022
'
,
'
038
'
,
'
052
'
,
'
055
'
,
'
065
'
,
'
090
'
,
'
103
'
,
'
128
'
,
'
130
'
,
'
146
'
,
'
160
'
,
'
163
'
,
'
173
'
,
'
198
'
,
'
201
'
,
'
203
'
,
'
209
'
,
'
212
'
,
'
225
'
,
'
230
'
,
'
242
'
,
'
249
'
]
reverse_horizontal
=
[
'
016
'
,
'
017
'
,
'
058
'
,
'
096
'
,
'
124
'
,
'
125
'
,
'
166
'
,
'
231
'
]
sampling_rate
=
[
'
050
'
,
'
061
'
,
'
105
'
,
'
158
'
,
'
169
'
]
signal_shift
=
[
'
204
'
]
smoothed_increase
=
[
'
241
'
]
steep_increase
=
[
'
051
'
,
'
159
'
]
time_shift
=
[
'
069
'
,
'
074
'
,
'
075
'
,
'
079
'
,
'
080
'
,
'
081
'
,
'
082
'
,
'
087
'
,
'
088
'
,
'
108
'
,
'
177
'
,
'
182
'
,
'
183
'
,
'
187
'
,
'
188
'
,
'
189
'
,
'
190
'
,
'
195
'
,
'
196
'
,
'
208
'
]
time_warping
=
[
'
031
'
,
'
076
'
,
'
139
'
,
'
184
'
]
unusual_pattern
=
[
'
001
'
,
'
006
'
,
'
009
'
,
'
010
'
,
'
041
'
,
'
046
'
,
'
047
'
,
'
073
'
,
'
093
'
,
'
109
'
,
'
114
'
,
'
117
'
,
'
118
'
,
'
149
'
,
'
154
'
,
'
155
'
,
'
181
'
,
'
210
'
,
'
211
'
,
'
214
'
,
'
218
'
,
'
219
'
,
'
221
'
,
'
240
'
,
'
250
'
]
def
count_direction_changes
(
subseq
):
count
=
0
direction
=
"
unchanged
"
for
i
in
range
(
1
,
len
(
subseq
)):
if
subseq
[
i
]
-
subseq
[
i
-
1
]
>
0
:
new_direction
=
"
up
"
elif
subseq
[
i
]
-
subseq
[
i
-
1
]
<
0
:
new_direction
=
"
down
"
else
:
new_direction
=
"
unchanged
"
if
new_direction
!=
direction
:
count
+=
1
direction
=
new_direction
return
count
def
point_anomaly
(
subseq
):
return
np
.
std
(
subseq
)
def
highest_autocorrelation
(
ts
,
min_size
=
10
,
max_size
=
1000
):
acf_values
=
acf
(
ts
,
fft
=
True
,
nlags
=
int
(
ts
.
shape
[
0
]
/
2
))
peaks
,
_
=
find_peaks
(
acf_values
)
peaks
=
peaks
[
np
.
logical_and
(
peaks
>=
min_size
,
peaks
<
max_size
)]
corrs
=
acf_values
[
peaks
]
if
len
(
peaks
)
==
0
:
peaks
,
_
=
find_peaks
(
acf_values
)
peaks
=
peaks
[
np
.
logical_and
(
peaks
>=
min_size
,
peaks
<
2000
)]
corrs
=
acf_values
[
peaks
]
if
len
(
peaks
)
==
0
:
return
-
1
return
peaks
[
np
.
argmax
(
corrs
)]
os
.
chdir
(
os
.
getcwd
()
+
"
\\
UCR_Anomaly_Archive
"
)
files
=
os
.
listdir
()
time_series_list
=
[]
for
file
in
files
:
if
file
.
endswith
(
"
.txt
"
):
time_series_list
.
append
(
file
)
ult_outlier
=
[]
ult_correct
=
0
wrong_datasets
=
[]
wrong_factors
=
[]
wrong_outlier_idx
=
[]
bigger_dist
=
[]
smaller_dist
=
[]
to_upper_bound
=
[]
to_lower_bound
=
[]
turnpts
=
[]
correct_ts
=
[]
incorrect_ts
=
[]
sum_stat_results
=
defaultdict
(
int
)
summary_statistics
=
[
np
.
min
,
np
.
max
,
np
.
mean
,
np
.
std
,
st
.
skew
,
st
.
kurtosis
,
count_direction_changes
,
point_anomaly
]
start_time
=
time
.
time
()
for
dataset
in
time_series_list
:
name_split
=
dataset
.
split
(
"
_
"
)
# To terate over time series with a specific anomaly type:
# "if name_split[0] in amplitude_change:", "if name_split[0] in flat:", ...
if
name_split
[
0
]:
data
=
np
.
array
(
pd
.
read_csv
(
dataset
,
header
=
None
)).
flatten
()
name_split
[
6
]
=
name_split
[
6
][:
-
4
]
y_line
=
np
.
mean
([
int
(
name_split
[
-
1
]),
int
(
name_split
[
-
2
])])
period_length
=
highest_autocorrelation
(
data
)
#begin index, end index and length of ground truth anomaly
begin
=
int
(
name_split
[
-
2
])
end
=
int
(
name_split
[
-
1
])
l
=
end
-
begin
+
1
data_original
=
data
data
=
np
.
diff
(
data
)
factor
=
[]
outlier_idx
=
[]
#powers_of_2 = [period_length]
for
f
in
summary_statistics
:
if
f
.
__name__
==
"
count_direction_changes
"
:
data
=
data_original
if
f
.
__name__
==
"
point_anomaly
"
:
period_length
=
3
agg_data
=
[]
for
j
in
range
(
len
(
data
)):
if
j
%
period_length
==
0
and
(
j
+
period_length
)
<=
len
(
data
):
agg_data
.
append
(
f
(
data
[
j
:
j
+
period_length
]))
#Scaling the data in range [0,1]
data_to_scale
=
[[
x
]
for
x
in
agg_data
]
scaler
=
MinMaxScaler
()
scaled_data
=
scaler
.
fit_transform
(
data_to_scale
)
scaled_data
=
scaled_data
.
flatten
()
train_test_idx
=
math
.
floor
((
int
(
name_split
[
-
3
])
/
period_length
))
# Set the index of either the maximal or the minimal value from
# the test dataset of the transformed data as an outlier index
if
np
.
max
(
scaled_data
[
train_test_idx
:])
-
np
.
mean
(
scaled_data
)
>=
np
.
mean
(
scaled_data
)
-
np
.
min
(
scaled_data
[
train_test_idx
:]):
outlier
=
np
.
argmax
(
scaled_data
[
train_test_idx
:])
+
train_test_idx
else
:
outlier
=
np
.
argmin
(
scaled_data
[
train_test_idx
:])
+
train_test_idx
# Save the anomaly detection accuracy for each summary statistic
if
outlier
*
period_length
+
period_length
/
2
>
min
(
begin
-
l
,
begin
-
100
)
and
outlier
*
period_length
+
period_length
/
2
<
max
(
end
+
l
,
end
+
100
):
sum_stat_results
[
f
.
__name__
]
+=
1
#Maximal and minimal values in the train set, used for comparison in next steps
sort_scaled
=
sorted
(
scaled_data
[:
train_test_idx
])
scaled_lower_bound
=
sort_scaled
[
0
]
scaled_upper_bound
=
sort_scaled
[
-
1
]
# If the detected outlier index is within the test dataset then
# it will be disregarded
if
outlier
<=
train_test_idx
:
factor
.
append
(
0
)
outlier_idx
.
append
(
0
)
# Save the index of the outlier plus the its difference with the
# max value of the train dataset
elif
scaled_data
[
outlier
]
>
scaled_upper_bound
:
factor
.
append
(
scaled_data
[
outlier
]
-
scaled_upper_bound
)
outlier_idx
.
append
(
outlier
*
period_length
+
period_length
/
2
)
# Save the index of the outlier plus the its difference with the
# min value of the train dataset
elif
scaled_data
[
outlier
]
<
scaled_lower_bound
:
factor
.
append
(
scaled_lower_bound
-
scaled_data
[
outlier
])
outlier_idx
.
append
(
outlier
*
period_length
+
period_length
/
2
)
# If the detected outlier is in the test dataset, but is smaller
# than the max and bigger then the min of the train dataset
else
:
factor
.
append
(
0
)
outlier_idx
.
append
(
outlier
*
period_length
+
period_length
/
2
)
if
f
.
__name__
==
"
count_direction_changes
"
:
data
=
np
.
diff
(
data
)
if
f
.
__name__
==
"
point_anomaly
"
:
period_length
=
highest_autocorrelation
(
data
)
# Pick the anomaly index with largest factor
max_fact
=
np
.
argmax
(
factor
)
ult_outlier
.
append
(
outlier_idx
[
max_fact
])
# Check if the anomaly index is within the range of the ground truth
if
outlier_idx
[
max_fact
]
>=
min
(
begin
-
l
,
begin
-
100
)
and
outlier_idx
[
max_fact
]
<=
max
(
end
+
l
,
end
+
100
):
ult_correct
+=
1
# The commented lines below were used for experimenting
#to_lower_bound.append(outlier_idx[max_fact] - min(begin-l,begin-100))
#to_upper_bound.append(max(end+l,end+100) - outlier_idx[max_fact])
#correct_ts.append(name_split[0])
#elif outlier_idx[max_fact] < min(begin-l,begin-100):
# smaller_dist.append(min(begin-l,begin-100) - outlier_idx[max_fact])
# incorrect_ts.append(name_split[0])
#elif outlier_idx[max_fact] > max(end+l,end+100):
# bigger_dist.append(outlier_idx[max_fact] - max(end+l,end+100))
# incorrect_ts.append(name_split[0])
#else:
# wrong_factors.append(factor)
# wrong_outlier_idx.append(outlier_idx)
# wrong_datasets.append(name_split[0])
# incorrect_ts.append(name_split[0])
end_time
=
time
.
time
()
runtime
=
end_time
-
start_time
print
(
"
Total Runtime:
"
,
runtime
,
"
seconds
"
)
print
(
"
Runtime per Dataset:
"
,
runtime
/
250
,
"
seconds
"
)
print
(
"
Correctly detected anomalies:
"
,
ult_correct
,
"
out of 250
"
)
#print(correct_ts)
#print(incorrect_ts)
print
(
"
Correctly detected anomalies per summary statistic:
"
)
print
(
sum_stat_results
)
#print(factor)
#print(outlier_idx)
#print(ult_outlier)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment