Skip to content

Commit fd74bba

Browse files
committed
added files that ran in the Ubuntu VM
1 parent 255cf13 commit fd74bba

File tree

6 files changed

+4870
-1
lines changed

6 files changed

+4870
-1
lines changed
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
import numpy as np
2+
import pandas as pd
3+
import tflite_runtime.interpreter as tflite
4+
from flask import Flask, jsonify, request
5+
from prometheus_client import Gauge, generate_latest, CONTENT_TYPE_LATEST
6+
from apscheduler.schedulers.background import BackgroundScheduler
7+
8+
9+
app = Flask(__name__)
10+
11+
12+
model_path = "/home/marco/autoencoder_model.tflite"
13+
interpreter = tflite.Interpreter(model_path=model_path)
14+
interpreter.allocate_tensors()
15+
input_details = interpreter.get_input_details()
16+
output_details = interpreter.get_output_details()
17+
18+
19+
anomaly_gauge = Gauge("anomaly_score", "Anomaly score from autoencoder")
20+
21+
22+
threshold = 0.1998
23+
24+
def update_anomaly_metric():
25+
try:
26+
data_path = "/home/marco/processed_prometheus_data_scaled.csv"
27+
df = pd.read_csv(data_path)
28+
29+
latest_data = df.drop(columns=["timestamp"]).iloc[-1].values.astype(np.float32)
30+
input_data = np.expand_dims(latest_data, axis=0)
31+
interpreter.set_tensor(input_details[0]['index'], input_data)
32+
interpreter.invoke()
33+
34+
reconstructed = interpreter.get_tensor(output_details[0]['index'])[0]
35+
mse = np.mean((latest_data - reconstructed) ** 2)
36+
anomaly_gauge.set(mse)
37+
print(f"Updated anomaly metric: {mse:.5f}")
38+
except Exception as e:
39+
print(f"Error updating anomaly metric: {e}")
40+
41+
42+
scheduler = BackgroundScheduler(daemon=True)
43+
scheduler.add_job(update_anomaly_metric, 'interval', seconds=15)
44+
scheduler.start()
45+
46+
47+
@app.route("/predict", methods=["GET"])
48+
def predict():
49+
try:
50+
data_path = "/home/marco/processed_prometheus_data_scaled.csv"
51+
df = pd.read_csv(data_path)
52+
53+
latest_data = df.drop(columns=["timestamp"]).iloc[-1].values.astype(np.float32)
54+
55+
input_data = np.expand_dims(latest_data, axis=0)
56+
57+
interpreter.set_tensor(input_details[0]['index'], input_data)
58+
interpreter.invoke()
59+
reconstructed = interpreter.get_tensor(output_details[0]['index'])[0]
60+
61+
mse = np.mean((latest_data - reconstructed) ** 2)
62+
anomaly_detected = mse > threshold
63+
64+
anomaly_gauge.set(mse)
65+
66+
return jsonify({
67+
"reconstruction_error": float(mse),
68+
"anomaly_detected": bool(anomaly_detected)
69+
})
70+
71+
except Exception as e:
72+
return jsonify({"error": str(e)}), 500
73+
74+
@app.route("/metrics", methods=["GET"])
75+
def metrics():
76+
return generate_latest(), 200, {'Content-Type': CONTENT_TYPE_LATEST}
77+
78+
if __name__ == "__main__":
79+
app.run(host="0.0.0.0", port=5000, debug=False)
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
import pandas as pd
2+
import json
3+
import os
4+
import joblib
5+
from sklearn.preprocessing import MinMaxScaler
6+
7+
raw_csv_path = "/home/marco/prometheus_data.csv"
8+
processed_csv_path = "/home/marco/processed_prometheus_data.csv"
9+
scaled_csv_path = "/home/marco/processed_prometheus_data_scaled.csv"
10+
scaler_path = "/home/marco/scaler.pkl"
11+
12+
13+
if os.path.exists(raw_csv_path):
14+
df = pd.read_csv(raw_csv_path)
15+
else:
16+
df = pd.DataFrame()
17+
18+
def extract_metric(json_file):
19+
try:
20+
with open(json_file) as f:
21+
data = json.load(f)
22+
if data['data']['result']:
23+
return float(data['data']['result'][0]['value'][1])
24+
except:
25+
return None
26+
27+
cpu_usage = extract_metric("/home/marco/cpu_usage.json")
28+
memory_usage = extract_metric("/home/marco/memory_usage.json")
29+
disk_read = extract_metric("/home/marco/disk_read.json")
30+
disk_write = extract_metric("/home/marco/disk_write.json")
31+
network_receive = extract_metric("/home/marco/net_receive.json")
32+
network_transmit = extract_metric("/home/marco/net_transmit.json")
33+
34+
new_row = {
35+
'timestamp': pd.Timestamp.now(),
36+
'cpu_usage': cpu_usage,
37+
'memory_usage': memory_usage,
38+
'disk_read': disk_read,
39+
'disk_write': disk_write,
40+
'network_receive': network_receive,
41+
'network_transmit': network_transmit,
42+
}
43+
44+
new_row_df = pd.DataFrame([new_row])
45+
46+
if df.empty:
47+
df = new_row_df
48+
else:
49+
df = pd.concat([df, new_row_df], ignore_index=True)
50+
51+
52+
df['timestamp'] = pd.to_datetime(df['timestamp'])
53+
54+
55+
df['cpu_change'] = df['cpu_usage'].diff()
56+
df['memory_change'] = df['memory_usage'].diff()
57+
df['disk_read_speed'] = df['disk_read'].diff()
58+
df['disk_write_speed'] = df['disk_write'].diff()
59+
df['network_receive_speed'] = df['network_receive'].diff()
60+
df['network_transmit_speed'] = df['network_transmit'].diff()
61+
62+
df['cpu_rolling'] = df['cpu_usage'].rolling(window=5).mean()
63+
df['memory_std'] = df['memory_usage'].rolling(window=5).std()
64+
65+
df['cpu_mem_ratio'] = df['cpu_usage'] / df['memory_usage']
66+
df['disk_cpu_ratio'] = df['disk_read'] / (df['cpu_usage'] + 1)
67+
68+
df['high_cpu'] = (df['cpu_usage'] > 80).astype(int)
69+
df['high_memory'] = (df['memory_usage'] > 80).astype(int)
70+
71+
72+
df['hour'] = df['timestamp'].dt.hour
73+
df['day_of_week'] = df['timestamp'].dt.weekday
74+
75+
76+
df.to_csv(raw_csv_path, index=False)
77+
78+
79+
cleaned_df = df.copy()
80+
81+
cleaned_df.dropna(how="all", inplace=True)
82+
cleaned_df.fillna(method="ffill", inplace=True)
83+
cleaned_df.fillna(cleaned_df.median(), inplace=True)
84+
85+
for col in ['disk_read', 'disk_write', 'network_receive', 'network_transmit']:
86+
cleaned_df[col] = cleaned_df[col].replace(0, cleaned_df[col].median())
87+
88+
valid_days = pd.date_range(end=pd.Timestamp.today().date(), periods=7).strftime("%Y-%m-%d").tolist()
89+
cleaned_df = cleaned_df[cleaned_df['timestamp'].dt.date.astype(str).isin(valid_days)]
90+
91+
92+
cleaned_df.to_csv(processed_csv_path, index=False)
93+
94+
print(f"Data cleaned and processed successfully! Saved to: {processed_csv_path}")
95+
print(f"Last Timestamp in CSV: {cleaned_df['timestamp'].max()}")
96+
97+
98+
if os.path.exists(scaler_path):
99+
scaler = joblib.load(scaler_path)
100+
df_scaled = cleaned_df.copy()
101+
102+
103+
df_scaled_no_ts = df_scaled.drop(columns=["timestamp"])
104+
105+
df_scaled_no_ts[df_scaled_no_ts.columns] = scaler.transform(df_scaled_no_ts)
106+
107+
df_scaled[df_scaled_no_ts.columns] = df_scaled_no_ts
108+
109+
df_scaled.to_csv(scaled_csv_path, index=False)
110+
print(f"Scaled data updated and saved to: {scaled_csv_path}")
111+
else:
112+
print("Warning: Scaler not found.")

0 commit comments

Comments
 (0)