Anpassung von Prometheus, Grafana und Backend auf Anomalieerkennung.
All checks were successful
release-tag / release-image (push) Successful in 2m20s
All checks were successful
release-tag / release-image (push) Successful in 2m20s
This commit is contained in:
@@ -3,74 +3,743 @@
|
||||
"list": []
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 1,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"type": "stat",
|
||||
"title": "Active Agents",
|
||||
"gridPos": { "h": 4, "w": 6, "x": 0, "y": 0 },
|
||||
"datasource": "$datasource",
|
||||
"gridPos": { "h": 4, "w": 4, "x": 0, "y": 0 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "eventcollector_active_agents",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "short",
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "red", "value": null },
|
||||
{ "color": "green", "value": 1 }
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"orientation": "auto",
|
||||
"textMode": "auto",
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "stat",
|
||||
"title": "High Detections (5m)",
|
||||
"gridPos": { "h": 4, "w": 6, "x": 6, "y": 0 },
|
||||
"title": "Events/s",
|
||||
"datasource": "$datasource",
|
||||
"gridPos": { "h": 4, "w": 4, "x": 4, "y": 0 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "increase(eventcollector_detection_hits_total{severity=\"high\"}[5m])",
|
||||
"expr": "sum(rate(eventcollector_ingest_events_total{channel=~\"$channel\",event_id=~\"$event_id\"}[5m]))",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "eps",
|
||||
"decimals": 2
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"textMode": "auto"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "stat",
|
||||
"title": "High Detections 5m",
|
||||
"datasource": "$datasource",
|
||||
"gridPos": { "h": 4, "w": 4, "x": 8, "y": 0 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(increase(eventcollector_detection_hits_total{severity=\"high\",rule=~\"$rule\"}[5m]))",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "short",
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "red", "value": 1 }
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"textMode": "auto"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "stat",
|
||||
"title": "Baseline Max Z-Score",
|
||||
"datasource": "$datasource",
|
||||
"gridPos": { "h": 4, "w": 4, "x": 12, "y": 0 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "max(eventcollector_anomaly_score{host=~\"$host\",rule=\"baseline_event_rate_anomaly\"})",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"decimals": 2,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "orange", "value": 3 },
|
||||
{ "color": "red", "value": 5 }
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"textMode": "auto"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "stat",
|
||||
"title": "Rule Errors 5m",
|
||||
"datasource": "$datasource",
|
||||
"gridPos": { "h": 4, "w": 4, "x": 16, "y": 0 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(increase(eventcollector_rule_errors_total{rule=~\"$rule\"}[5m]))",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "short",
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "red", "value": 1 }
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"textMode": "auto"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "stat",
|
||||
"title": "DB Insert Failures 5m",
|
||||
"datasource": "$datasource",
|
||||
"gridPos": { "h": 4, "w": 4, "x": 20, "y": 0 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "increase(eventcollector_db_insert_failures_total[5m])",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "short",
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "red", "value": 1 }
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"textMode": "auto"
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "HTTP Requests",
|
||||
"title": "Ingested Events / Second by Channel",
|
||||
"datasource": "$datasource",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 4 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(eventcollector_http_requests_total[5m])",
|
||||
"legendFormat": "{{path}} {{status}}",
|
||||
"expr": "sum by (channel) (rate(eventcollector_ingest_events_total{channel=~\"$channel\",event_id=~\"$event_id\"}[5m]))",
|
||||
"legendFormat": "{{channel}}",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "eps",
|
||||
"decimals": 2
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Detection Hits",
|
||||
"title": "Detection Hits by Rule / Severity",
|
||||
"datasource": "$datasource",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 4 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "increase(eventcollector_detection_hits_total[5m])",
|
||||
"legendFormat": "{{rule}} {{severity}}",
|
||||
"expr": "sum by (rule,severity) (increase(eventcollector_detection_hits_total{rule=~\"$rule\",severity=~\"$severity\"}[5m]))",
|
||||
"legendFormat": "{{rule}} / {{severity}}",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Baseline: Current Count vs Average",
|
||||
"datasource": "$datasource",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 12 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "eventcollector_baseline_current_count{host=~\"$host\",channel=~\"$channel\",event_id=~\"$event_id\"}",
|
||||
"legendFormat": "current {{host}} {{channel}} {{event_id}}",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "eventcollector_baseline_avg_count{host=~\"$host\",channel=~\"$channel\",event_id=~\"$event_id\"}",
|
||||
"legendFormat": "avg {{host}} {{channel}} {{event_id}}",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "short",
|
||||
"decimals": 2
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Ingested Events",
|
||||
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 12 },
|
||||
"title": "Baseline Z-Score",
|
||||
"datasource": "$datasource",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 12 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(eventcollector_ingest_events_total[5m])",
|
||||
"legendFormat": "{{channel}} {{event_id}}",
|
||||
"expr": "eventcollector_anomaly_score{host=~\"$host\",rule=\"baseline_event_rate_anomaly\"}",
|
||||
"legendFormat": "{{host}}",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"decimals": 2,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "orange", "value": 3 },
|
||||
{ "color": "red", "value": 5 }
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"type": "bargauge",
|
||||
"title": "Top Baseline Z-Scores",
|
||||
"datasource": "$datasource",
|
||||
"gridPos": { "h": 8, "w": 8, "x": 0, "y": 20 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "topk(10, eventcollector_anomaly_score{host=~\"$host\",rule=\"baseline_event_rate_anomaly\"})",
|
||||
"legendFormat": "{{host}}",
|
||||
"refId": "A",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"decimals": 2,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "orange", "value": 3 },
|
||||
{ "color": "red", "value": 5 }
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"displayMode": "gradient",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"showUnfilled": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "bargauge",
|
||||
"title": "Top EventIDs by Ingest Rate",
|
||||
"datasource": "$datasource",
|
||||
"gridPos": { "h": 8, "w": 8, "x": 8, "y": 20 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "topk(15, sum by (channel,event_id) (rate(eventcollector_ingest_events_total{channel=~\"$channel\",event_id=~\"$event_id\"}[5m])))",
|
||||
"legendFormat": "{{channel}} / {{event_id}}",
|
||||
"refId": "A",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "eps",
|
||||
"decimals": 2
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"displayMode": "gradient",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"showUnfilled": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "bargauge",
|
||||
"title": "Top Detection Rules 1h",
|
||||
"datasource": "$datasource",
|
||||
"gridPos": { "h": 8, "w": 8, "x": 16, "y": 20 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "topk(15, sum by (rule,severity) (increase(eventcollector_detection_hits_total{rule=~\"$rule\",severity=~\"$severity\"}[1h])))",
|
||||
"legendFormat": "{{rule}} / {{severity}}",
|
||||
"refId": "A",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"displayMode": "gradient",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"showUnfilled": true
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "HTTP Requests by Path / Status",
|
||||
"datasource": "$datasource",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 28 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (path,status) (rate(eventcollector_http_requests_total[5m]))",
|
||||
"legendFormat": "{{path}} {{status}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "reqps",
|
||||
"decimals": 2
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "HTTP Latency p95",
|
||||
"datasource": "$datasource",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 28 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.95, sum by (le,path) (rate(eventcollector_http_request_duration_seconds_bucket[5m])))",
|
||||
"legendFormat": "{{path}} p95",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "s",
|
||||
"decimals": 3
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "DB Insert Transaction Latency p95",
|
||||
"datasource": "$datasource",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 36 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.95, sum by (le) (rate(eventcollector_db_tx_duration_seconds_bucket[5m])))",
|
||||
"legendFormat": "db tx p95",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "s",
|
||||
"decimals": 3
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "DB Batch Size p95",
|
||||
"datasource": "$datasource",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 36 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.95, sum by (le) (rate(eventcollector_db_batch_size_bucket[5m])))",
|
||||
"legendFormat": "batch size p95",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "short",
|
||||
"decimals": 0
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"type": "table",
|
||||
"title": "Agent Last Seen",
|
||||
"datasource": "$datasource",
|
||||
"gridPos": { "h": 10, "w": 12, "x": 0, "y": 44 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "time() - eventcollector_agent_last_seen_unixtime{host=~\"$host\"}",
|
||||
"legendFormat": "{{host}}",
|
||||
"refId": "A",
|
||||
"instant": true,
|
||||
"format": "table"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "s",
|
||||
"decimals": 0
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"showHeader": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "table",
|
||||
"title": "Baseline Samples",
|
||||
"datasource": "$datasource",
|
||||
"gridPos": { "h": 10, "w": 12, "x": 12, "y": 44 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "eventcollector_baseline_sample_count{host=~\"$host\",channel=~\"$channel\",event_id=~\"$event_id\"}",
|
||||
"legendFormat": "{{host}} {{channel}} {{event_id}}",
|
||||
"refId": "A",
|
||||
"instant": true,
|
||||
"format": "table"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "short",
|
||||
"decimals": 0
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"showHeader": true
|
||||
}
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 39,
|
||||
"style": "dark",
|
||||
"tags": ["siem"],
|
||||
"templating": { "list": [] },
|
||||
"tags": ["siem", "baseline", "ad"],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"name": "datasource",
|
||||
"type": "datasource",
|
||||
"query": "prometheus",
|
||||
"current": {},
|
||||
"hide": 0,
|
||||
"label": "Datasource"
|
||||
},
|
||||
{
|
||||
"name": "host",
|
||||
"type": "query",
|
||||
"datasource": "$datasource",
|
||||
"query": "label_values(eventcollector_agent_last_seen_unixtime, host)",
|
||||
"refresh": 1,
|
||||
"includeAll": true,
|
||||
"multi": true,
|
||||
"allValue": ".*",
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"label": "Host"
|
||||
},
|
||||
{
|
||||
"name": "channel",
|
||||
"type": "query",
|
||||
"datasource": "$datasource",
|
||||
"query": "label_values(eventcollector_ingest_events_total, channel)",
|
||||
"refresh": 1,
|
||||
"includeAll": true,
|
||||
"multi": true,
|
||||
"allValue": ".*",
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"label": "Channel"
|
||||
},
|
||||
{
|
||||
"name": "event_id",
|
||||
"type": "query",
|
||||
"datasource": "$datasource",
|
||||
"query": "label_values(eventcollector_ingest_events_total, event_id)",
|
||||
"refresh": 1,
|
||||
"includeAll": true,
|
||||
"multi": true,
|
||||
"allValue": ".*",
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"label": "Event ID"
|
||||
},
|
||||
{
|
||||
"name": "rule",
|
||||
"type": "query",
|
||||
"datasource": "$datasource",
|
||||
"query": "label_values(eventcollector_detection_hits_total, rule)",
|
||||
"refresh": 1,
|
||||
"includeAll": true,
|
||||
"multi": true,
|
||||
"allValue": ".*",
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"label": "Rule"
|
||||
},
|
||||
{
|
||||
"name": "severity",
|
||||
"type": "custom",
|
||||
"query": "low,medium,high",
|
||||
"includeAll": true,
|
||||
"multi": true,
|
||||
"allValue": ".*",
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"label": "Severity"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-6h",
|
||||
"to": "now"
|
||||
},
|
||||
"title": "SIEM Overview",
|
||||
"timezone": "browser",
|
||||
"title": "SIEM Overview Extended",
|
||||
"uid": "siem-overview-extended",
|
||||
"version": 1
|
||||
}
|
||||
@@ -89,14 +89,14 @@ CREATE TABLE IF NOT EXISTS detections (
|
||||
|
||||
CREATE TABLE detection_rules (
|
||||
id BIGINT AUTO_INCREMENT PRIMARY KEY,
|
||||
name VARCHAR(128) NOT NULL UNIQUE,
|
||||
name VARCHAR(255) NOT NULL UNIQUE,
|
||||
description TEXT,
|
||||
severity VARCHAR(16) NOT NULL DEFAULT 'medium',
|
||||
|
||||
channel VARCHAR(64) NOT NULL DEFAULT 'Security',
|
||||
channel VARCHAR(255) NOT NULL DEFAULT 'Security',
|
||||
event_ids VARCHAR(255) NOT NULL,
|
||||
|
||||
match_field VARCHAR(64) DEFAULT '',
|
||||
match_field VARCHAR(255) DEFAULT '',
|
||||
match_operator VARCHAR(16) DEFAULT '',
|
||||
match_value TEXT,
|
||||
|
||||
@@ -1312,4 +1312,42 @@ ALTER TABLE detection_rules
|
||||
MODIFY description TEXT NULL,
|
||||
MODIFY match_value TEXT NULL,
|
||||
MODIFY match_field VARCHAR(64) NOT NULL DEFAULT '',
|
||||
MODIFY match_operator VARCHAR(16) NOT NULL DEFAULT '';
|
||||
MODIFY match_operator VARCHAR(16) NOT NULL DEFAULT '';
|
||||
|
||||
|
||||
|
||||
CREATE TABLE baseline_event_stats (
|
||||
id BIGINT AUTO_INCREMENT PRIMARY KEY,
|
||||
|
||||
hostname VARCHAR(255) NOT NULL,
|
||||
channel_name VARCHAR(255) NOT NULL,
|
||||
event_id INT NOT NULL,
|
||||
|
||||
hour_of_day TINYINT NOT NULL,
|
||||
day_of_week TINYINT NOT NULL,
|
||||
|
||||
avg_count DOUBLE NOT NULL DEFAULT 0,
|
||||
m2_count DOUBLE NOT NULL DEFAULT 0,
|
||||
stddev_count DOUBLE NOT NULL DEFAULT 0,
|
||||
sample_count INT NOT NULL DEFAULT 0,
|
||||
|
||||
last_updated TIMESTAMP(6) NOT NULL DEFAULT CURRENT_TIMESTAMP(6) ON UPDATE CURRENT_TIMESTAMP(6),
|
||||
|
||||
UNIQUE KEY uniq_baseline_event (
|
||||
hostname,
|
||||
channel_name,
|
||||
event_id,
|
||||
hour_of_day,
|
||||
day_of_week
|
||||
)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_baseline_event_lookup
|
||||
ON baseline_event_stats (
|
||||
hostname,
|
||||
channel_name,
|
||||
event_id,
|
||||
hour_of_day,
|
||||
day_of_week,
|
||||
sample_count
|
||||
);
|
||||
@@ -1,5 +1,5 @@
|
||||
groups:
|
||||
- name: siem-backend
|
||||
- name: siem-backend-availability
|
||||
rules:
|
||||
- alert: SiemBackendDown
|
||||
expr: up{job="siem-backend"} == 0
|
||||
@@ -10,6 +10,26 @@ groups:
|
||||
summary: "SIEM backend nicht erreichbar"
|
||||
description: "Prometheus kann das SIEM-Backend seit mindestens 2 Minuten nicht scrapen."
|
||||
|
||||
- alert: SiemNoIngestEvents
|
||||
expr: sum(rate(eventcollector_ingest_events_total[15m])) == 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Keine eingehenden SIEM Events"
|
||||
description: "Seit mindestens 15 Minuten wurden keine Events mehr ingestiert."
|
||||
|
||||
- alert: SiemTooFewActiveAgents
|
||||
expr: eventcollector_active_agents < 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Zu wenige aktive Agents"
|
||||
description: "Es wurden weniger aktive Agents erkannt als erwartet."
|
||||
|
||||
- name: siem-backend-detections
|
||||
rules:
|
||||
- alert: SiemHighDetections
|
||||
expr: increase(eventcollector_detection_hits_total{severity="high"}[5m]) > 0
|
||||
for: 1m
|
||||
@@ -19,6 +39,33 @@ groups:
|
||||
summary: "Neue High-Severity Detection"
|
||||
description: "Es wurde mindestens eine neue High-Severity-Detection in den letzten 5 Minuten erzeugt."
|
||||
|
||||
- alert: SiemManyMediumDetections
|
||||
expr: sum(increase(eventcollector_detection_hits_total{severity="medium"}[15m])) > 10
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Viele Medium-Detections"
|
||||
description: "Es wurden mehr als 10 Medium-Detections in 15 Minuten erzeugt."
|
||||
|
||||
- alert: SiemBaselineHighAnomaly
|
||||
expr: eventcollector_anomaly_score{rule="baseline_event_rate_anomaly"} >= 5
|
||||
for: 2m
|
||||
labels:
|
||||
severity: high
|
||||
annotations:
|
||||
summary: "Hohe Baseline-Anomalie"
|
||||
description: "Host {{ $labels.host }} hat einen hohen Baseline-Z-Score: {{ $value }}."
|
||||
|
||||
- alert: SiemBaselineMediumAnomaly
|
||||
expr: eventcollector_anomaly_score{rule="baseline_event_rate_anomaly"} >= 3
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Baseline-Anomalie"
|
||||
description: "Host {{ $labels.host }} hat einen erhöhten Baseline-Z-Score: {{ $value }}."
|
||||
|
||||
- alert: SiemRuleErrors
|
||||
expr: increase(eventcollector_rule_errors_total[5m]) > 0
|
||||
for: 1m
|
||||
@@ -28,11 +75,51 @@ groups:
|
||||
summary: "Fehler in Detection-Regeln"
|
||||
description: "Mindestens eine Detection-Regel hat in den letzten 5 Minuten einen Fehler erzeugt."
|
||||
|
||||
- alert: SiemTooFewActiveAgents
|
||||
expr: eventcollector_active_agents < 1
|
||||
- name: siem-backend-ingest
|
||||
rules:
|
||||
- alert: SiemIngestRejected
|
||||
expr: sum(increase(eventcollector_ingest_rejected_total[5m])) > 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Ingest Requests abgelehnt"
|
||||
description: "In den letzten 5 Minuten wurden Ingest Requests abgelehnt."
|
||||
|
||||
- alert: SiemDBInsertFailures
|
||||
expr: increase(eventcollector_db_insert_failures_total[5m]) > 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: high
|
||||
annotations:
|
||||
summary: "DB Insert Fehler"
|
||||
description: "Das SIEM-Backend konnte Events nicht in die Datenbank schreiben."
|
||||
|
||||
- alert: SiemHighIngestRate
|
||||
expr: sum(rate(eventcollector_ingest_events_total[5m])) > 500
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Zu wenige aktive Agents"
|
||||
description: "Es wurden weniger aktive Agents erkannt als erwartet."
|
||||
summary: "Sehr hohe Eventrate"
|
||||
description: "Die Eventrate liegt seit 5 Minuten über 500 Events/s."
|
||||
|
||||
- name: siem-backend-baseline
|
||||
rules:
|
||||
- alert: SiemBaselineNotEnoughSamples
|
||||
expr: eventcollector_baseline_sample_count > 0 and eventcollector_baseline_sample_count < 24
|
||||
for: 30m
|
||||
labels:
|
||||
severity: info
|
||||
annotations:
|
||||
summary: "Baseline lernt noch"
|
||||
description: "Für {{ $labels.host }} / {{ $labels.channel }} / {{ $labels.event_id }} gibt es erst {{ $value }} Samples."
|
||||
|
||||
- alert: SiemBaselineCurrentFarAboveAverage
|
||||
expr: eventcollector_baseline_avg_count > 0 and (eventcollector_baseline_current_count / eventcollector_baseline_avg_count) > 10
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Eventrate deutlich über Baseline"
|
||||
description: "{{ $labels.host }} / {{ $labels.channel }} / {{ $labels.event_id }} liegt mehr als 10x über Durchschnitt."
|
||||
16
dot_env
16
dot_env
@@ -36,4 +36,18 @@ MARIADB_ROOT_PASSWORD=ROOTPASSWORT
|
||||
GRAFANA_ADMIN_USER=admin
|
||||
GRAFANA_ADMIN_PASSWORD=admin
|
||||
|
||||
ENROLLMENT_KEY=BITTE_SEHR_LANG_UND_ZUFAELLIG
|
||||
ENROLLMENT_KEY=BITTE_SEHR_LANG_UND_ZUFAELLIG
|
||||
|
||||
BASELINE_ENABLED=true
|
||||
BASELINE_WINDOW=5m
|
||||
BASELINE_MIN_SAMPLES=24
|
||||
BASELINE_MIN_COUNT=10
|
||||
BASELINE_MEDIUM_Z=2.5
|
||||
BASELINE_HIGH_Z=4.0
|
||||
BASELINE_SUPPRESS_FOR=1h
|
||||
|
||||
|
||||
#BASELINE_MIN_SAMPLES=84
|
||||
#BASELINE_MEDIUM_Z=3.0
|
||||
#BASELINE_HIGH_Z=5.0
|
||||
#BASELINE_MIN_COUNT=20
|
||||
662
main.go
662
main.go
@@ -85,6 +85,7 @@ const uiTemplates = `
|
||||
<a href="/ui">Dashboard</a>
|
||||
<a href="/ui/agents">Agents</a>
|
||||
<a href="/ui/rules">Rules</a>
|
||||
<a href="/ui/baseline">Baseline</a>
|
||||
<a href="/ui/detections">Detections</a>
|
||||
<a href="/ui/events">Events</a>
|
||||
<a href="/metrics">Metrics</a>
|
||||
@@ -246,6 +247,53 @@ const uiTemplates = `
|
||||
{{template "footer" .}}
|
||||
{{end}}
|
||||
|
||||
{{define "baseline"}}
|
||||
{{template "header" .}}
|
||||
<h1>{{.Title}}</h1>
|
||||
<p class="muted">Baseline-Anomalien aus der Regel <strong>baseline_event_rate_anomaly</strong>.</p>
|
||||
|
||||
<form method="get" action="/ui/baseline">
|
||||
<div class="filters">
|
||||
<div><label>Host</label><input name="host" value="{{index .Filters "host"}}"></div>
|
||||
<div><label>Channel</label><input name="channel" value="{{index .Filters "channel"}}"></div>
|
||||
<div><label>Event ID</label><input name="event_id" value="{{index .Filters "event_id"}}"></div>
|
||||
<div><label>Severity</label><input name="severity" value="{{index .Filters "severity"}}"></div>
|
||||
<div><label>Limit</label><input name="limit" value="{{index .Filters "limit"}}"></div>
|
||||
</div>
|
||||
<button type="submit">Filtern</button>
|
||||
</form>
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<th>Zeit</th>
|
||||
<th>Host</th>
|
||||
<th>Channel</th>
|
||||
<th>EventID</th>
|
||||
<th>Severity</th>
|
||||
<th>Aktuell</th>
|
||||
<th>Baseline</th>
|
||||
<th>Z-Score</th>
|
||||
<th>Samples</th>
|
||||
<th>Bucket</th>
|
||||
</tr>
|
||||
{{range .Anomalies}}
|
||||
<tr>
|
||||
<td>{{fmtTime .CreatedAt}}</td>
|
||||
<td>{{.Hostname}}</td>
|
||||
<td>{{.Channel}}</td>
|
||||
<td><a href="/ui/events?host={{q .Hostname}}&channel={{q .Channel}}&event_id={{.EventID}}">{{.EventID}}</a></td>
|
||||
<td class="sev-{{.Severity}}">{{.Severity}}</td>
|
||||
<td><strong>{{.Count}}</strong></td>
|
||||
<td>{{printf "%.2f" .AvgCount}} ± {{printf "%.2f" .StddevCount}}</td>
|
||||
<td><strong>{{printf "%.2f" .ZScore}}</strong></td>
|
||||
<td>{{.SampleCount}}</td>
|
||||
<td>Tag {{.DayOfWeek}}, Stunde {{.HourOfDay}}</td>
|
||||
</tr>
|
||||
{{end}}
|
||||
</table>
|
||||
{{template "footer" .}}
|
||||
{{end}}
|
||||
|
||||
{{define "events"}}
|
||||
{{template "header" .}}
|
||||
<h1>{{.Title}}</h1>
|
||||
@@ -391,6 +439,14 @@ type Config struct {
|
||||
DetectionsLimit int
|
||||
|
||||
EnrollmentKey string
|
||||
|
||||
BaselineEnabled bool
|
||||
BaselineWindow time.Duration
|
||||
BaselineMinSamples int
|
||||
BaselineMinCount int
|
||||
BaselineMediumZScore float64
|
||||
BaselineHighZScore float64
|
||||
BaselineSuppressFor time.Duration
|
||||
}
|
||||
|
||||
type LogPayload struct {
|
||||
@@ -468,6 +524,11 @@ type detector struct {
|
||||
ruleLastRunGauge *prometheus.GaugeVec
|
||||
ruleRuntimeHist *prometheus.HistogramVec
|
||||
ruleErrorsTotal *prometheus.CounterVec
|
||||
|
||||
baselineCurrentCountGauge *prometheus.GaugeVec
|
||||
baselineAverageGauge *prometheus.GaugeVec
|
||||
baselineStddevGauge *prometheus.GaugeVec
|
||||
baselineSamplesGauge *prometheus.GaugeVec
|
||||
}
|
||||
|
||||
type EventRow struct {
|
||||
@@ -574,6 +635,65 @@ type DynamicRulePageData struct {
|
||||
Rules []DynamicRule
|
||||
}
|
||||
|
||||
type BaselineBucket struct {
|
||||
Hostname string
|
||||
Channel string
|
||||
EventID uint32
|
||||
Hour int
|
||||
DayOfWeek int
|
||||
Count int
|
||||
}
|
||||
|
||||
type BaselineStat struct {
|
||||
AvgCount float64
|
||||
M2Count float64
|
||||
StddevCount float64
|
||||
SampleCount int
|
||||
}
|
||||
|
||||
type BaselineAnomalyRow struct {
|
||||
ID uint64
|
||||
CreatedAt time.Time
|
||||
Hostname string
|
||||
Channel string
|
||||
EventID uint32
|
||||
Severity string
|
||||
Score float64
|
||||
WindowStart time.Time
|
||||
WindowEnd time.Time
|
||||
Summary string
|
||||
|
||||
Count int
|
||||
AvgCount float64
|
||||
StddevCount float64
|
||||
ZScore float64
|
||||
SampleCount int
|
||||
HourOfDay int
|
||||
DayOfWeek int
|
||||
WindowMin int
|
||||
}
|
||||
|
||||
type BaselinePageData struct {
|
||||
Title string
|
||||
Now time.Time
|
||||
Filters map[string]string
|
||||
Anomalies []BaselineAnomalyRow
|
||||
}
|
||||
|
||||
type baselineDetailsJSON struct {
|
||||
Hostname string `json:"hostname"`
|
||||
Channel string `json:"channel"`
|
||||
EventID uint32 `json:"event_id"`
|
||||
Count int `json:"count"`
|
||||
AvgCount float64 `json:"avg_count"`
|
||||
StddevCount float64 `json:"stddev_count"`
|
||||
ZScore float64 `json:"z_score"`
|
||||
SampleCount int `json:"sample_count"`
|
||||
HourOfDay int `json:"hour_of_day"`
|
||||
DayOfWeek int `json:"day_of_week"`
|
||||
WindowMinutes int `json:"window_minutes"`
|
||||
}
|
||||
|
||||
var (
|
||||
httpRequestsTotal = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{Name: "eventcollector_http_requests_total", Help: "Total HTTP requests."},
|
||||
@@ -671,6 +791,34 @@ func main() {
|
||||
prometheus.CounterOpts{Name: "eventcollector_rule_errors_total", Help: "Rule execution errors."},
|
||||
[]string{"rule"},
|
||||
),
|
||||
baselineCurrentCountGauge: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "eventcollector_baseline_current_count",
|
||||
Help: "Current event count in baseline window.",
|
||||
},
|
||||
[]string{"host", "channel", "event_id"},
|
||||
),
|
||||
baselineAverageGauge: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "eventcollector_baseline_avg_count",
|
||||
Help: "Baseline average event count.",
|
||||
},
|
||||
[]string{"host", "channel", "event_id"},
|
||||
),
|
||||
baselineStddevGauge: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "eventcollector_baseline_stddev_count",
|
||||
Help: "Baseline standard deviation event count.",
|
||||
},
|
||||
[]string{"host", "channel", "event_id"},
|
||||
),
|
||||
baselineSamplesGauge: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "eventcollector_baseline_sample_count",
|
||||
Help: "Baseline sample count.",
|
||||
},
|
||||
[]string{"host", "channel", "event_id"},
|
||||
),
|
||||
}
|
||||
reg.MustRegister(
|
||||
d.lastSeenGauge,
|
||||
@@ -680,6 +828,10 @@ func main() {
|
||||
d.ruleLastRunGauge,
|
||||
d.ruleRuntimeHist,
|
||||
d.ruleErrorsTotal,
|
||||
d.baselineCurrentCountGauge,
|
||||
d.baselineAverageGauge,
|
||||
d.baselineStddevGauge,
|
||||
d.baselineSamplesGauge,
|
||||
)
|
||||
|
||||
s := &server{
|
||||
@@ -726,6 +878,7 @@ func main() {
|
||||
mux.HandleFunc("/ui/rules", s.handleUIRules)
|
||||
mux.HandleFunc("/ui/rules/save", s.handleUIRuleSave)
|
||||
mux.HandleFunc("/ui/rules/toggle", s.handleUIRuleToggle)
|
||||
mux.HandleFunc("/ui/baseline", s.handleUIBaseline)
|
||||
|
||||
httpSrv := &http.Server{
|
||||
Addr: cfg.ListenAddr,
|
||||
@@ -759,6 +912,142 @@ func main() {
|
||||
}
|
||||
}
|
||||
|
||||
func (s *server) listBaselineAnomalies(ctx context.Context, host, channel, severity string, eventID uint32, limit int) ([]BaselineAnomalyRow, error) {
|
||||
if limit <= 0 || limit > 1000 {
|
||||
limit = 100
|
||||
}
|
||||
|
||||
query := `
|
||||
SELECT id, severity, hostname, channel_name, event_id, score,
|
||||
window_start, window_end, summary, details_json, created_at
|
||||
FROM detections
|
||||
WHERE rule_name = 'baseline_event_rate_anomaly'
|
||||
`
|
||||
args := make([]any, 0, 8)
|
||||
|
||||
if host != "" {
|
||||
query += ` AND hostname = ?`
|
||||
args = append(args, host)
|
||||
}
|
||||
if channel != "" {
|
||||
query += ` AND channel_name = ?`
|
||||
args = append(args, channel)
|
||||
}
|
||||
if eventID != 0 {
|
||||
query += ` AND event_id = ?`
|
||||
args = append(args, eventID)
|
||||
}
|
||||
if severity != "" {
|
||||
query += ` AND severity = ?`
|
||||
args = append(args, severity)
|
||||
}
|
||||
|
||||
query += ` ORDER BY created_at DESC LIMIT ?`
|
||||
args = append(args, limit)
|
||||
|
||||
rows, err := s.db.QueryContext(ctx, query, args...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
out := make([]BaselineAnomalyRow, 0)
|
||||
|
||||
for rows.Next() {
|
||||
var row BaselineAnomalyRow
|
||||
var detailsRaw []byte
|
||||
|
||||
if err := rows.Scan(
|
||||
&row.ID,
|
||||
&row.Severity,
|
||||
&row.Hostname,
|
||||
&row.Channel,
|
||||
&row.EventID,
|
||||
&row.Score,
|
||||
&row.WindowStart,
|
||||
&row.WindowEnd,
|
||||
&row.Summary,
|
||||
&detailsRaw,
|
||||
&row.CreatedAt,
|
||||
); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var details baselineDetailsJSON
|
||||
if err := json.Unmarshal(detailsRaw, &details); err == nil {
|
||||
row.Count = details.Count
|
||||
row.AvgCount = details.AvgCount
|
||||
row.StddevCount = details.StddevCount
|
||||
row.ZScore = details.ZScore
|
||||
row.SampleCount = details.SampleCount
|
||||
row.HourOfDay = details.HourOfDay
|
||||
row.DayOfWeek = details.DayOfWeek
|
||||
row.WindowMin = details.WindowMinutes
|
||||
} else {
|
||||
row.ZScore = row.Score
|
||||
}
|
||||
|
||||
out = append(out, row)
|
||||
}
|
||||
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
func (s *server) handleUIBaseline(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodGet {
|
||||
writeError(w, http.StatusMethodNotAllowed, "method not allowed")
|
||||
return
|
||||
}
|
||||
|
||||
filters := map[string]string{
|
||||
"host": strings.TrimSpace(r.URL.Query().Get("host")),
|
||||
"channel": strings.TrimSpace(r.URL.Query().Get("channel")),
|
||||
"event_id": strings.TrimSpace(r.URL.Query().Get("event_id")),
|
||||
"severity": strings.TrimSpace(r.URL.Query().Get("severity")),
|
||||
"limit": strings.TrimSpace(r.URL.Query().Get("limit")),
|
||||
}
|
||||
|
||||
limit := 100
|
||||
if filters["limit"] != "" {
|
||||
if n, err := strconv.Atoi(filters["limit"]); err == nil && n > 0 && n <= 1000 {
|
||||
limit = n
|
||||
}
|
||||
}
|
||||
|
||||
var eventID uint32
|
||||
if filters["event_id"] != "" {
|
||||
if n, err := strconv.ParseUint(filters["event_id"], 10, 32); err == nil {
|
||||
eventID = uint32(n)
|
||||
}
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(r.Context(), 5*time.Second)
|
||||
defer cancel()
|
||||
|
||||
items, err := s.listBaselineAnomalies(
|
||||
ctx,
|
||||
filters["host"],
|
||||
filters["channel"],
|
||||
filters["severity"],
|
||||
eventID,
|
||||
limit,
|
||||
)
|
||||
if err != nil {
|
||||
s.logger.Printf("ui baseline: %v", err)
|
||||
writeError(w, http.StatusInternalServerError, "internal error")
|
||||
return
|
||||
}
|
||||
|
||||
data := BaselinePageData{
|
||||
Title: "Baseline-Anomalien",
|
||||
Now: time.Now(),
|
||||
Filters: filters,
|
||||
Anomalies: items,
|
||||
}
|
||||
|
||||
s.renderTemplate(w, "baseline", data)
|
||||
}
|
||||
|
||||
func (s *server) listDynamicRules(ctx context.Context) ([]DynamicRule, error) {
|
||||
const q = `
|
||||
SELECT id,
|
||||
@@ -1457,9 +1746,48 @@ func loadConfig() Config {
|
||||
DetectionsLimit: getenvInt("DETECTIONS_LIMIT", 100),
|
||||
|
||||
EnrollmentKey: mustGetenv("ENROLLMENT_KEY"),
|
||||
|
||||
BaselineEnabled: getenvBool("BASELINE_ENABLED", true),
|
||||
BaselineWindow: getenvDuration("BASELINE_WINDOW", 5*time.Minute),
|
||||
BaselineMinSamples: getenvInt("BASELINE_MIN_SAMPLES", 24),
|
||||
BaselineMinCount: getenvInt("BASELINE_MIN_COUNT", 10),
|
||||
BaselineMediumZScore: getenvFloat("BASELINE_MEDIUM_Z", 2.5),
|
||||
BaselineHighZScore: getenvFloat("BASELINE_HIGH_Z", 4.0),
|
||||
BaselineSuppressFor: getenvDuration("BASELINE_SUPPRESS_FOR", 1*time.Hour),
|
||||
}
|
||||
}
|
||||
|
||||
func getenvBool(key string, def bool) bool {
|
||||
v := strings.TrimSpace(os.Getenv(key))
|
||||
if v == "" {
|
||||
return def
|
||||
}
|
||||
|
||||
switch strings.ToLower(v) {
|
||||
case "1", "true", "yes", "y", "on":
|
||||
return true
|
||||
case "0", "false", "no", "n", "off":
|
||||
return false
|
||||
default:
|
||||
log.Fatalf("invalid bool for %s: %s", key, v)
|
||||
return def
|
||||
}
|
||||
}
|
||||
|
||||
func getenvFloat(key string, def float64) float64 {
|
||||
v := strings.TrimSpace(os.Getenv(key))
|
||||
if v == "" {
|
||||
return def
|
||||
}
|
||||
|
||||
f, err := strconv.ParseFloat(v, 64)
|
||||
if err != nil {
|
||||
log.Fatalf("invalid float for %s: %v", key, err)
|
||||
}
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func (s *server) handleHealthz(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodGet {
|
||||
ingestRejectedTotal.WithLabelValues("method_not_allowed").Inc()
|
||||
@@ -1826,6 +2154,337 @@ func (s *server) runDetectionLoop() {
|
||||
}
|
||||
}
|
||||
|
||||
func (d *detector) runBaselineUpdate(ctx context.Context) error {
|
||||
if !d.cfg.BaselineEnabled {
|
||||
return nil
|
||||
}
|
||||
|
||||
windowEnd := time.Now().UTC()
|
||||
windowStart := windowEnd.Add(-d.cfg.BaselineWindow)
|
||||
|
||||
rows, err := d.db.QueryContext(ctx, `
|
||||
SELECT
|
||||
hostname,
|
||||
channel_name,
|
||||
event_id,
|
||||
HOUR(ts) AS hour_of_day,
|
||||
WEEKDAY(ts) AS day_of_week,
|
||||
COUNT(*) AS cnt
|
||||
FROM event_logs
|
||||
WHERE ts >= ? AND ts < ?
|
||||
GROUP BY hostname, channel_name, event_id, HOUR(ts), WEEKDAY(ts)
|
||||
`, windowStart, windowEnd)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
for rows.Next() {
|
||||
var b BaselineBucket
|
||||
if err := rows.Scan(
|
||||
&b.Hostname,
|
||||
&b.Channel,
|
||||
&b.EventID,
|
||||
&b.Hour,
|
||||
&b.DayOfWeek,
|
||||
&b.Count,
|
||||
); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := d.updateBaselineBucket(ctx, b); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return rows.Err()
|
||||
}
|
||||
|
||||
func (d *detector) updateBaselineBucket(ctx context.Context, b BaselineBucket) error {
|
||||
tx, err := d.db.BeginTx(ctx, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() { _ = tx.Rollback() }()
|
||||
|
||||
var stat BaselineStat
|
||||
|
||||
err = tx.QueryRowContext(ctx, `
|
||||
SELECT avg_count, m2_count, stddev_count, sample_count
|
||||
FROM baseline_event_stats
|
||||
WHERE hostname = ?
|
||||
AND channel_name = ?
|
||||
AND event_id = ?
|
||||
AND hour_of_day = ?
|
||||
AND day_of_week = ?
|
||||
FOR UPDATE
|
||||
`,
|
||||
b.Hostname,
|
||||
b.Channel,
|
||||
b.EventID,
|
||||
b.Hour,
|
||||
b.DayOfWeek,
|
||||
).Scan(
|
||||
&stat.AvgCount,
|
||||
&stat.M2Count,
|
||||
&stat.StddevCount,
|
||||
&stat.SampleCount,
|
||||
)
|
||||
|
||||
if err != nil && !errors.Is(err, sql.ErrNoRows) {
|
||||
return err
|
||||
}
|
||||
|
||||
x := float64(b.Count)
|
||||
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
_, err := tx.ExecContext(ctx, `
|
||||
INSERT INTO baseline_event_stats
|
||||
(hostname, channel_name, event_id, hour_of_day, day_of_week,
|
||||
avg_count, m2_count, stddev_count, sample_count)
|
||||
VALUES (?, ?, ?, ?, ?, ?, 0, 0, 1)
|
||||
`,
|
||||
b.Hostname,
|
||||
b.Channel,
|
||||
b.EventID,
|
||||
b.Hour,
|
||||
b.DayOfWeek,
|
||||
x,
|
||||
)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return tx.Commit()
|
||||
}
|
||||
|
||||
newSamples := stat.SampleCount + 1
|
||||
delta := x - stat.AvgCount
|
||||
newAvg := stat.AvgCount + delta/float64(newSamples)
|
||||
delta2 := x - newAvg
|
||||
newM2 := stat.M2Count + delta*delta2
|
||||
|
||||
newStddev := 0.0
|
||||
if newSamples > 1 {
|
||||
newStddev = math.Sqrt(newM2 / float64(newSamples-1))
|
||||
}
|
||||
|
||||
_, err = tx.ExecContext(ctx, `
|
||||
UPDATE baseline_event_stats
|
||||
SET avg_count = ?,
|
||||
m2_count = ?,
|
||||
stddev_count = ?,
|
||||
sample_count = ?,
|
||||
last_updated = CURRENT_TIMESTAMP(6)
|
||||
WHERE hostname = ?
|
||||
AND channel_name = ?
|
||||
AND event_id = ?
|
||||
AND hour_of_day = ?
|
||||
AND day_of_week = ?
|
||||
`,
|
||||
newAvg,
|
||||
newM2,
|
||||
newStddev,
|
||||
newSamples,
|
||||
b.Hostname,
|
||||
b.Channel,
|
||||
b.EventID,
|
||||
b.Hour,
|
||||
b.DayOfWeek,
|
||||
)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return tx.Commit()
|
||||
}
|
||||
|
||||
func (d *detector) runBaselineAnomalyRule(ctx context.Context) error {
|
||||
if !d.cfg.BaselineEnabled {
|
||||
return nil
|
||||
}
|
||||
|
||||
windowEnd := time.Now().UTC()
|
||||
windowStart := windowEnd.Add(-d.cfg.BaselineWindow)
|
||||
|
||||
rows, err := d.db.QueryContext(ctx, `
|
||||
SELECT
|
||||
e.hostname,
|
||||
e.channel_name,
|
||||
e.event_id,
|
||||
HOUR(e.ts) AS hour_of_day,
|
||||
WEEKDAY(e.ts) AS day_of_week,
|
||||
COUNT(*) AS cnt,
|
||||
b.avg_count,
|
||||
b.stddev_count,
|
||||
b.sample_count
|
||||
FROM event_logs e
|
||||
JOIN baseline_event_stats b
|
||||
ON b.hostname = e.hostname
|
||||
AND b.channel_name = e.channel_name
|
||||
AND b.event_id = e.event_id
|
||||
AND b.hour_of_day = HOUR(e.ts)
|
||||
AND b.day_of_week = WEEKDAY(e.ts)
|
||||
WHERE e.ts >= ? AND e.ts < ?
|
||||
GROUP BY
|
||||
e.hostname,
|
||||
e.channel_name,
|
||||
e.event_id,
|
||||
HOUR(e.ts),
|
||||
WEEKDAY(e.ts),
|
||||
b.avg_count,
|
||||
b.stddev_count,
|
||||
b.sample_count
|
||||
`, windowStart, windowEnd)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
for rows.Next() {
|
||||
var host string
|
||||
var channel string
|
||||
var eventID uint32
|
||||
var hour int
|
||||
var dayOfWeek int
|
||||
var count int
|
||||
var avg float64
|
||||
var stddev float64
|
||||
var samples int
|
||||
|
||||
if err := rows.Scan(
|
||||
&host,
|
||||
&channel,
|
||||
&eventID,
|
||||
&hour,
|
||||
&dayOfWeek,
|
||||
&count,
|
||||
&avg,
|
||||
&stddev,
|
||||
&samples,
|
||||
); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
eventIDStr := strconv.Itoa(int(eventID))
|
||||
|
||||
d.baselineCurrentCountGauge.WithLabelValues(host, channel, eventIDStr).Set(float64(count))
|
||||
d.baselineAverageGauge.WithLabelValues(host, channel, eventIDStr).Set(avg)
|
||||
d.baselineStddevGauge.WithLabelValues(host, channel, eventIDStr).Set(stddev)
|
||||
d.baselineSamplesGauge.WithLabelValues(host, channel, eventIDStr).Set(float64(samples))
|
||||
|
||||
if samples < d.cfg.BaselineMinSamples {
|
||||
continue
|
||||
}
|
||||
|
||||
if count < d.cfg.BaselineMinCount {
|
||||
continue
|
||||
}
|
||||
|
||||
if stddev <= 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
z := (float64(count) - avg) / stddev
|
||||
|
||||
if z < d.cfg.BaselineMediumZScore {
|
||||
continue
|
||||
}
|
||||
|
||||
severity := "medium"
|
||||
if z >= d.cfg.BaselineHighZScore {
|
||||
severity = "high"
|
||||
}
|
||||
|
||||
suppressed, err := d.isBaselineSuppressed(ctx, host, channel, eventID, windowEnd)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if suppressed {
|
||||
continue
|
||||
}
|
||||
|
||||
score := z
|
||||
|
||||
created, err := d.insertDetection(ctx, Detection{
|
||||
RuleName: "baseline_event_rate_anomaly",
|
||||
Severity: severity,
|
||||
Hostname: host,
|
||||
Channel: channel,
|
||||
EventID: eventID,
|
||||
Score: score,
|
||||
WindowStart: windowStart,
|
||||
WindowEnd: windowEnd,
|
||||
Summary: fmt.Sprintf(
|
||||
"Baseline-Anomalie auf %s: %s EventID %d kam %d-mal in %d Minuten, normal %.2f ± %.2f, z=%.2f",
|
||||
host,
|
||||
channel,
|
||||
eventID,
|
||||
count,
|
||||
int(d.cfg.BaselineWindow.Minutes()),
|
||||
avg,
|
||||
stddev,
|
||||
z,
|
||||
),
|
||||
Details: mustJSON(map[string]any{
|
||||
"hostname": host,
|
||||
"channel": channel,
|
||||
"event_id": eventID,
|
||||
"count": count,
|
||||
"avg_count": avg,
|
||||
"stddev_count": stddev,
|
||||
"z_score": z,
|
||||
"sample_count": samples,
|
||||
"hour_of_day": hour,
|
||||
"day_of_week": dayOfWeek,
|
||||
"window_minutes": int(d.cfg.BaselineWindow.Minutes()),
|
||||
"min_samples": d.cfg.BaselineMinSamples,
|
||||
"medium_z": d.cfg.BaselineMediumZScore,
|
||||
"high_z": d.cfg.BaselineHighZScore,
|
||||
}),
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if created {
|
||||
d.detectionHitsTotal.WithLabelValues("baseline_event_rate_anomaly", severity).Inc()
|
||||
d.anomalyScoreGauge.WithLabelValues(host, "baseline_event_rate_anomaly").Set(score)
|
||||
}
|
||||
}
|
||||
|
||||
return rows.Err()
|
||||
}
|
||||
|
||||
func (d *detector) isBaselineSuppressed(ctx context.Context, hostname, channel string, eventID uint32, now time.Time) (bool, error) {
|
||||
if d.cfg.BaselineSuppressFor <= 0 {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
since := now.UTC().Add(-d.cfg.BaselineSuppressFor)
|
||||
|
||||
var count int
|
||||
err := d.db.QueryRowContext(ctx, `
|
||||
SELECT COUNT(*)
|
||||
FROM detections
|
||||
WHERE rule_name = 'baseline_event_rate_anomaly'
|
||||
AND hostname = ?
|
||||
AND channel_name = ?
|
||||
AND event_id = ?
|
||||
AND created_at >= ?
|
||||
`,
|
||||
hostname,
|
||||
channel,
|
||||
eventID,
|
||||
since,
|
||||
).Scan(&count)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
return count > 0, nil
|
||||
}
|
||||
|
||||
func (d *detector) runDynamicRules(ctx context.Context) error {
|
||||
rows, err := d.db.QueryContext(ctx, `
|
||||
SELECT id, name, description, severity, channel, event_ids,
|
||||
@@ -2287,6 +2946,9 @@ func (s *server) runDetectionsOnce() {
|
||||
{"success_after_failures", s.detector.runSuccessAfterFailuresRule},
|
||||
{"new_source_ip_for_user", s.detector.runNewSourceIPForUserRule},
|
||||
{"dynamic_rules", s.detector.runDynamicRules},
|
||||
|
||||
{"baseline_anomaly", s.detector.runBaselineAnomalyRule},
|
||||
{"baseline_update", s.detector.runBaselineUpdate},
|
||||
}
|
||||
|
||||
for _, rule := range rules {
|
||||
|
||||
Reference in New Issue
Block a user