diff --git a/k8s/apps/furumi/kustomization.yaml b/k8s/apps/furumi/kustomization.yaml index 8755c8e..e2a23ad 100644 --- a/k8s/apps/furumi/kustomization.yaml +++ b/k8s/apps/furumi/kustomization.yaml @@ -7,3 +7,4 @@ resources: - external-secrets.yaml - ingress.yaml - deployment.yaml + - servicemonitor.yaml diff --git a/k8s/apps/furumi/servicemonitor.yaml b/k8s/apps/furumi/servicemonitor.yaml new file mode 100644 index 0000000..7f23059 --- /dev/null +++ b/k8s/apps/furumi/servicemonitor.yaml @@ -0,0 +1,21 @@ +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: furumi-player-metrics + labels: + app: furumi-player + release: prometheus +spec: + selector: + matchLabels: + app: furumi-player + endpoints: + - port: http + path: /metrics + interval: 30s + scrapeTimeout: 10s + honorLabels: true + namespaceSelector: + matchNames: + - furumi diff --git a/k8s/core/prom-stack/dashboards/furumi-dashboard-cm.yaml b/k8s/core/prom-stack/dashboards/furumi-dashboard-cm.yaml new file mode 100644 index 0000000..c165c43 --- /dev/null +++ b/k8s/core/prom-stack/dashboards/furumi-dashboard-cm.yaml @@ -0,0 +1,491 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: furumi-dashboard + labels: + grafana_dashboard: "1" +data: + furumi.json: |- + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { "type": "grafana", "uid": "-- Grafana --" }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "id": 1, + "title": "Build Version", + "type": "stat", + "gridPos": { "h": 4, "w": 4, "x": 0, "y": 0 }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "expr": "max by (version) (furumusic_build_info{namespace=~\"$namespace\"})", + "legendFormat": "{{version}}", + "refId": "A" + } + ], + "options": { "colorMode": "none", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calc": "lastNotNull", "fields": "", "values": false }, "textMode": "name" } + }, + { + "id": 2, + "title": "HTTP RPS", + "type": "stat", + "gridPos": { "h": 4, "w": 4, "x": 4, "y": 0 }, + "fieldConfig": { "defaults": { "unit": "reqps", "decimals": 2 }, "overrides": [] }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(furumusic_http_requests_total{namespace=~\"$namespace\"}[$__rate_interval]))", "refId": "A" } + ], + "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calc": "lastNotNull", "fields": "", "values": false }, "textMode": "auto" } + }, + { + "id": 3, + "title": "5xx Error Ratio", + "type": "stat", + "gridPos": { "h": 4, "w": 4, "x": 8, "y": 0 }, + "fieldConfig": { "defaults": { "unit": "percentunit", "decimals": 2, "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "yellow", "value": 0.01 }, { "color": "red", "value": 0.05 } ] } }, "overrides": [] }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(furumusic_http_requests_total{namespace=~\"$namespace\",status=~\"5..\"}[$__rate_interval])) / clamp_min(sum(rate(furumusic_http_requests_total{namespace=~\"$namespace\"}[$__rate_interval])), 0.001)", "refId": "A" } + ], + "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calc": "lastNotNull", "fields": "", "values": false }, "textMode": "auto" } + }, + { + "id": 4, + "title": "HTTP p95 Latency", + "type": "stat", + "gridPos": { "h": 4, "w": 4, "x": 12, "y": 0 }, + "fieldConfig": { "defaults": { "unit": "s", "decimals": 3 }, "overrides": [] }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.95, sum by (le) (rate(furumusic_http_request_duration_seconds_bucket{namespace=~\"$namespace\"}[$__rate_interval])))", "refId": "A" } + ], + "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calc": "lastNotNull", "fields": "", "values": false }, "textMode": "auto" } + }, + { + "id": 5, + "title": "Active Users 15m", + "type": "stat", + "gridPos": { "h": 4, "w": 4, "x": 16, "y": 0 }, + "fieldConfig": { "defaults": { "unit": "short", "decimals": 0 }, "overrides": [] }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(furumusic_active_users{namespace=~\"$namespace\",window=\"15m\"})", "refId": "A" } + ], + "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calc": "lastNotNull", "fields": "", "values": false }, "textMode": "auto" } + }, + { + "id": 6, + "title": "Stream Throughput", + "type": "stat", + "gridPos": { "h": 4, "w": 4, "x": 20, "y": 0 }, + "fieldConfig": { "defaults": { "unit": "Bps", "decimals": 1 }, "overrides": [] }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(furumusic_stream_bytes_total{namespace=~\"$namespace\"}[$__rate_interval]))", "refId": "A" } + ], + "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calc": "lastNotNull", "fields": "", "values": false }, "textMode": "auto" } + }, + { "id": 10, "title": "HTTP", "type": "row", "gridPos": { "h": 1, "w": 24, "x": 0, "y": 4 }, "collapsed": false, "panels": [] }, + { + "id": 11, + "title": "Request Rate by Route and Status", + "type": "timeseries", + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 5 }, + "fieldConfig": { "defaults": { "unit": "reqps" }, "overrides": [] }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum by (route, status) (rate(furumusic_http_requests_total{namespace=~\"$namespace\",route=~\"$route\"}[$__rate_interval]))", "legendFormat": "{{route}} {{status}}", "refId": "A" } + ], + "options": { "legend": { "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } } + }, + { + "id": 12, + "title": "Request Duration Quantiles", + "type": "timeseries", + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 5 }, + "fieldConfig": { "defaults": { "unit": "s", "decimals": 3 }, "overrides": [] }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.50, sum by (le) (rate(furumusic_http_request_duration_seconds_bucket{namespace=~\"$namespace\",route=~\"$route\"}[$__rate_interval])))", "legendFormat": "p50", "refId": "A" }, + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.95, sum by (le) (rate(furumusic_http_request_duration_seconds_bucket{namespace=~\"$namespace\",route=~\"$route\"}[$__rate_interval])))", "legendFormat": "p95", "refId": "B" }, + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.99, sum by (le) (rate(furumusic_http_request_duration_seconds_bucket{namespace=~\"$namespace\",route=~\"$route\"}[$__rate_interval])))", "legendFormat": "p99", "refId": "C" } + ], + "options": { "legend": { "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } } + }, + { + "id": 13, + "title": "In-flight Requests", + "type": "timeseries", + "gridPos": { "h": 7, "w": 8, "x": 0, "y": 13 }, + "fieldConfig": { "defaults": { "unit": "short" }, "overrides": [] }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum by (route) (furumusic_http_in_flight_requests{namespace=~\"$namespace\",route=~\"$route\"})", "legendFormat": "{{route}}", "refId": "A" } + ], + "options": { "legend": { "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } } + }, + { + "id": 14, + "title": "HTTP Body Throughput", + "type": "timeseries", + "gridPos": { "h": 7, "w": 8, "x": 8, "y": 13 }, + "fieldConfig": { "defaults": { "unit": "Bps" }, "overrides": [] }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(furumusic_http_request_body_bytes_total{namespace=~\"$namespace\",route=~\"$route\"}[$__rate_interval]))", "legendFormat": "request", "refId": "A" }, + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(furumusic_http_response_body_bytes_total{namespace=~\"$namespace\",route=~\"$route\"}[$__rate_interval]))", "legendFormat": "response", "refId": "B" } + ], + "options": { "legend": { "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } } + }, + { + "id": 15, + "title": "Status Code Mix", + "type": "timeseries", + "gridPos": { "h": 7, "w": 8, "x": 16, "y": 13 }, + "fieldConfig": { "defaults": { "unit": "reqps" }, "overrides": [] }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum by (status) (rate(furumusic_http_requests_total{namespace=~\"$namespace\"}[$__rate_interval]))", "legendFormat": "{{status}}", "refId": "A" } + ], + "options": { "legend": { "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } } + }, + { "id": 20, "title": "Auth and Users", "type": "row", "gridPos": { "h": 1, "w": 24, "x": 0, "y": 20 }, "collapsed": false, "panels": [] }, + { + "id": 21, + "title": "Users by Role", + "type": "bargauge", + "gridPos": { "h": 7, "w": 8, "x": 0, "y": 21 }, + "fieldConfig": { "defaults": { "unit": "short", "decimals": 0 }, "overrides": [] }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum by (role) (furumusic_users_total{namespace=~\"$namespace\"})", "legendFormat": "{{role}}", "refId": "A" } + ], + "options": { "displayMode": "gradient", "orientation": "horizontal", "reduceOptions": { "calc": "lastNotNull", "fields": "", "values": false }, "showUnfilled": true } + }, + { + "id": 22, + "title": "Active Users", + "type": "timeseries", + "gridPos": { "h": 7, "w": 8, "x": 8, "y": 21 }, + "fieldConfig": { "defaults": { "unit": "short", "decimals": 0 }, "overrides": [] }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum by (window) (furumusic_active_users{namespace=~\"$namespace\"})", "legendFormat": "{{window}}", "refId": "A" } + ], + "options": { "legend": { "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } } + }, + { + "id": 23, + "title": "Auth Events", + "type": "timeseries", + "gridPos": { "h": 7, "w": 8, "x": 16, "y": 21 }, + "fieldConfig": { "defaults": { "unit": "ops" }, "overrides": [] }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum by (method, outcome, reason) (rate(furumusic_auth_login_attempts_total{namespace=~\"$namespace\"}[$__rate_interval]))", "legendFormat": "login {{method}} {{outcome}} {{reason}}", "refId": "A" }, + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum by (method) (rate(furumusic_auth_sessions_created_total{namespace=~\"$namespace\"}[$__rate_interval]))", "legendFormat": "session {{method}}", "refId": "B" }, + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum by (kind) (rate(furumusic_auth_denied_total{namespace=~\"$namespace\"}[$__rate_interval]))", "legendFormat": "denied {{kind}}", "refId": "C" } + ], + "options": { "legend": { "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } } + }, + { "id": 30, "title": "Playback and Streaming", "type": "row", "gridPos": { "h": 1, "w": 24, "x": 0, "y": 28 }, "collapsed": false, "panels": [] }, + { + "id": 31, + "title": "Listens Rate", + "type": "timeseries", + "gridPos": { "h": 7, "w": 8, "x": 0, "y": 29 }, + "fieldConfig": { "defaults": { "unit": "ops" }, "overrides": [] }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum by (completed) (rate(furumusic_listens_total{namespace=~\"$namespace\"}[$__rate_interval]))", "legendFormat": "completed={{completed}}", "refId": "A" } + ], + "options": { "legend": { "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } } + }, + { + "id": 32, + "title": "Listen Quality", + "type": "timeseries", + "gridPos": { "h": 7, "w": 8, "x": 8, "y": 29 }, + "fieldConfig": { "defaults": { "unit": "percentunit", "decimals": 2 }, "overrides": [] }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(furumusic_listens_total{namespace=~\"$namespace\",completed=\"true\"}[$__rate_interval])) / clamp_min(sum(rate(furumusic_listens_total{namespace=~\"$namespace\"}[$__rate_interval])), 0.001)", "legendFormat": "completion ratio", "refId": "A" } + ], + "options": { "legend": { "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } } + }, + { + "id": 33, + "title": "Streaming", + "type": "timeseries", + "gridPos": { "h": 7, "w": 8, "x": 16, "y": 29 }, + "fieldConfig": { "defaults": { "unit": "Bps" }, "overrides": [] }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(furumusic_stream_bytes_total{namespace=~\"$namespace\"}[$__rate_interval]))", "legendFormat": "bytes", "refId": "A" }, + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum by (range) (rate(furumusic_stream_requests_total{namespace=~\"$namespace\"}[$__rate_interval]))", "legendFormat": "requests range={{range}}", "refId": "B" } + ], + "options": { "legend": { "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } } + }, + { + "id": 34, + "title": "Listened Hours and History", + "type": "timeseries", + "gridPos": { "h": 7, "w": 12, "x": 0, "y": 36 }, + "fieldConfig": { "defaults": { "unit": "short" }, "overrides": [] }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(increase(furumusic_listened_seconds_total{namespace=~\"$namespace\"}[$__range])) / 3600", "legendFormat": "listened hours in range", "refId": "A" }, + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(furumusic_play_history_total{namespace=~\"$namespace\"})", "legendFormat": "history rows", "refId": "B" } + ], + "options": { "legend": { "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } } + }, + { "id": 40, "title": "Library and Storage", "type": "row", "gridPos": { "h": 1, "w": 24, "x": 0, "y": 43 }, "collapsed": false, "panels": [] }, + { + "id": 41, + "title": "Library Inventory", + "type": "timeseries", + "gridPos": { "h": 7, "w": 8, "x": 0, "y": 44 }, + "fieldConfig": { "defaults": { "unit": "short", "decimals": 0 }, "overrides": [] }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(furumusic_library_tracks_total{namespace=~\"$namespace\"})", "legendFormat": "tracks", "refId": "A" }, + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(furumusic_library_releases_total{namespace=~\"$namespace\"})", "legendFormat": "releases", "refId": "B" }, + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(furumusic_library_artists_total{namespace=~\"$namespace\"})", "legendFormat": "artists", "refId": "C" }, + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(furumusic_library_playlists_total{namespace=~\"$namespace\"})", "legendFormat": "playlists", "refId": "D" } + ], + "options": { "legend": { "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } } + }, + { + "id": 42, + "title": "Media Files and Bytes by Type", + "type": "timeseries", + "gridPos": { "h": 7, "w": 8, "x": 8, "y": 44 }, + "fieldConfig": { "defaults": { "unit": "short" }, "overrides": [] }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum by (type) (furumusic_media_files_total{namespace=~\"$namespace\"})", "legendFormat": "files {{type}}", "refId": "A" }, + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum by (type) (furumusic_media_file_bytes_total{namespace=~\"$namespace\"})", "legendFormat": "bytes {{type}}", "refId": "B" } + ], + "options": { "legend": { "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } } + }, + { + "id": 43, + "title": "Storage Used Ratio", + "type": "timeseries", + "gridPos": { "h": 7, "w": 8, "x": 16, "y": 44 }, + "fieldConfig": { "defaults": { "unit": "percentunit", "decimals": 2, "min": 0, "max": 1 }, "overrides": [] }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "1 - (sum by (path_kind) (furumusic_storage_free_bytes{namespace=~\"$namespace\"}) / sum by (path_kind) (furumusic_storage_total_bytes{namespace=~\"$namespace\"}))", "legendFormat": "{{path_kind}}", "refId": "A" } + ], + "options": { "legend": { "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } } + }, + { "id": 50, "title": "AI Agent", "type": "row", "gridPos": { "h": 1, "w": 24, "x": 0, "y": 51 }, "collapsed": false, "panels": [] }, + { + "id": 51, + "title": "Agent Queue and Reviews", + "type": "timeseries", + "gridPos": { "h": 7, "w": 8, "x": 0, "y": 52 }, + "fieldConfig": { "defaults": { "unit": "short", "decimals": 0 }, "overrides": [] }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum by (status) (furumusic_agent_queue_depth{namespace=~\"$namespace\"})", "legendFormat": "queue {{status}}", "refId": "A" }, + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum by (status) (furumusic_agent_reviews_total{namespace=~\"$namespace\"})", "legendFormat": "reviews {{status}}", "refId": "B" } + ], + "options": { "legend": { "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } } + }, + { + "id": 52, + "title": "Agent Processing and Failures", + "type": "timeseries", + "gridPos": { "h": 7, "w": 8, "x": 8, "y": 52 }, + "fieldConfig": { "defaults": { "unit": "ops" }, "overrides": [] }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum by (outcome, decision) (rate(furumusic_agent_files_processed_total{namespace=~\"$namespace\"}[$__rate_interval]))", "legendFormat": "processed {{outcome}} {{decision}}", "refId": "A" }, + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum by (stage) (rate(furumusic_agent_failed_total{namespace=~\"$namespace\"}[$__rate_interval]))", "legendFormat": "failed {{stage}}", "refId": "B" } + ], + "options": { "legend": { "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } } + }, + { + "id": 53, + "title": "Agent Confidence", + "type": "timeseries", + "gridPos": { "h": 7, "w": 8, "x": 16, "y": 52 }, + "fieldConfig": { "defaults": { "unit": "percentunit", "decimals": 2, "min": 0, "max": 1 }, "overrides": [] }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.50, sum by (le) (rate(furumusic_agent_confidence_bucket{namespace=~\"$namespace\"}[$__rate_interval])))", "legendFormat": "p50", "refId": "A" }, + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.95, sum by (le) (rate(furumusic_agent_confidence_bucket{namespace=~\"$namespace\"}[$__rate_interval])))", "legendFormat": "p95", "refId": "B" } + ], + "options": { "legend": { "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } } + }, + { + "id": 54, + "title": "Discover Runs and Duration", + "type": "timeseries", + "gridPos": { "h": 7, "w": 12, "x": 0, "y": 59 }, + "fieldConfig": { "defaults": { "unit": "ops" }, "overrides": [] }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum by (outcome) (rate(furumusic_agent_discover_runs_total{namespace=~\"$namespace\"}[$__rate_interval]))", "legendFormat": "runs {{outcome}}", "refId": "A" }, + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.95, sum by (le, outcome) (rate(furumusic_agent_discover_duration_seconds_bucket{namespace=~\"$namespace\"}[$__rate_interval])))", "legendFormat": "p95 {{outcome}}", "refId": "B" } + ], + "options": { "legend": { "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } } + }, + { + "id": 55, + "title": "Discover File Flow", + "type": "timeseries", + "gridPos": { "h": 7, "w": 12, "x": 12, "y": 59 }, + "fieldConfig": { "defaults": { "unit": "ops" }, "overrides": [] }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(furumusic_agent_discover_files_seen_total{namespace=~\"$namespace\"}[$__rate_interval]))", "legendFormat": "seen", "refId": "A" }, + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(furumusic_agent_discover_files_queued_total{namespace=~\"$namespace\"}[$__rate_interval]))", "legendFormat": "queued", "refId": "B" }, + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum by (reason) (rate(furumusic_agent_discover_files_skipped_total{namespace=~\"$namespace\"}[$__rate_interval]))", "legendFormat": "skipped {{reason}}", "refId": "C" } + ], + "options": { "legend": { "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } } + }, + { + "id": 56, + "title": "RAG and LLM Requests", + "type": "timeseries", + "gridPos": { "h": 7, "w": 8, "x": 0, "y": 66 }, + "fieldConfig": { "defaults": { "unit": "ops" }, "overrides": [] }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum by (kind, outcome) (rate(furumusic_agent_rag_queries_total{namespace=~\"$namespace\"}[$__rate_interval]))", "legendFormat": "rag {{kind}} {{outcome}}", "refId": "A" }, + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum by (model, outcome) (rate(furumusic_agent_llm_requests_total{namespace=~\"$namespace\"}[$__rate_interval]))", "legendFormat": "llm {{model}} {{outcome}}", "refId": "B" } + ], + "options": { "legend": { "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } } + }, + { + "id": 57, + "title": "RAG and LLM Latency p95", + "type": "timeseries", + "gridPos": { "h": 7, "w": 8, "x": 8, "y": 66 }, + "fieldConfig": { "defaults": { "unit": "s", "decimals": 2 }, "overrides": [] }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.95, sum by (le, kind, outcome) (rate(furumusic_agent_rag_duration_seconds_bucket{namespace=~\"$namespace\"}[$__rate_interval])))", "legendFormat": "rag {{kind}} {{outcome}}", "refId": "A" }, + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.95, sum by (le, model, outcome) (rate(furumusic_agent_llm_duration_seconds_bucket{namespace=~\"$namespace\"}[$__rate_interval])))", "legendFormat": "llm {{model}} {{outcome}}", "refId": "B" } + ], + "options": { "legend": { "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } } + }, + { + "id": 58, + "title": "LLM Tokens and Batch Pressure", + "type": "timeseries", + "gridPos": { "h": 7, "w": 8, "x": 16, "y": 66 }, + "fieldConfig": { "defaults": { "unit": "ops" }, "overrides": [] }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum by (model, type) (rate(furumusic_agent_llm_tokens_total{namespace=~\"$namespace\"}[$__rate_interval]))", "legendFormat": "tokens {{model}} {{type}}", "refId": "A" }, + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum by (reason) (rate(furumusic_agent_llm_batch_splits_total{namespace=~\"$namespace\"}[$__rate_interval]))", "legendFormat": "splits {{reason}}", "refId": "B" }, + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum by (model) (rate(furumusic_agent_llm_parse_failures_total{namespace=~\"$namespace\"}[$__rate_interval]))", "legendFormat": "parse failures {{model}}", "refId": "C" } + ], + "options": { "legend": { "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } } + }, + { + "id": 59, + "title": "Cover Pipeline", + "type": "timeseries", + "gridPos": { "h": 7, "w": 12, "x": 0, "y": 73 }, + "fieldConfig": { "defaults": { "unit": "ops" }, "overrides": [] }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum by (source, outcome) (rate(furumusic_agent_cover_lookup_total{namespace=~\"$namespace\"}[$__rate_interval]))", "legendFormat": "lookup {{source}} {{outcome}}", "refId": "A" }, + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum by (variant, outcome) (rate(furumusic_agent_cover_variant_generation_total{namespace=~\"$namespace\"}[$__rate_interval]))", "legendFormat": "variant {{variant}} {{outcome}}", "refId": "B" } + ], + "options": { "legend": { "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } } + }, + { "id": 70, "title": "Scheduler and Torrents", "type": "row", "gridPos": { "h": 1, "w": 24, "x": 0, "y": 80 }, "collapsed": false, "panels": [] }, + { + "id": 71, + "title": "Scheduler Jobs", + "type": "timeseries", + "gridPos": { "h": 7, "w": 8, "x": 0, "y": 81 }, + "fieldConfig": { "defaults": { "unit": "short" }, "overrides": [] }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum by (job) (furumusic_scheduler_job_running{namespace=~\"$namespace\"})", "legendFormat": "running {{job}}", "refId": "A" }, + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum by (job) (furumusic_scheduler_job_enabled{namespace=~\"$namespace\"})", "legendFormat": "enabled {{job}}", "refId": "B" } + ], + "options": { "legend": { "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } } + }, + { + "id": 72, + "title": "Scheduler Runs and Duration p95", + "type": "timeseries", + "gridPos": { "h": 7, "w": 8, "x": 8, "y": 81 }, + "fieldConfig": { "defaults": { "unit": "ops" }, "overrides": [] }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum by (job, trigger, outcome) (rate(furumusic_scheduler_job_runs_total{namespace=~\"$namespace\"}[$__rate_interval]))", "legendFormat": "{{job}} {{trigger}} {{outcome}}", "refId": "A" }, + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "histogram_quantile(0.95, sum by (le, job, trigger, outcome) (rate(furumusic_scheduler_job_duration_seconds_bucket{namespace=~\"$namespace\"}[$__rate_interval])))", "legendFormat": "p95 {{job}} {{outcome}}", "refId": "B" } + ], + "options": { "legend": { "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } } + }, + { + "id": 73, + "title": "Torrents", + "type": "timeseries", + "gridPos": { "h": 7, "w": 8, "x": 16, "y": 81 }, + "fieldConfig": { "defaults": { "unit": "short" }, "overrides": [] }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum by (status) (furumusic_torrent_sessions_total{namespace=~\"$namespace\"})", "legendFormat": "sessions {{status}}", "refId": "A" }, + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum by (outcome) (rate(furumusic_torrent_downloads_total{namespace=~\"$namespace\"}[$__rate_interval]))", "legendFormat": "downloads {{outcome}}", "refId": "B" }, + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum by (outcome) (rate(furumusic_torrent_selected_bytes_total{namespace=~\"$namespace\"}[$__rate_interval]))", "legendFormat": "bytes {{outcome}}", "refId": "C" } + ], + "options": { "legend": { "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } } + } + ], + "refresh": "30s", + "schemaVersion": 38, + "style": "dark", + "tags": [ "furumi", "furumusic", "music" ], + "templating": { + "list": [ + { + "current": { "selected": false, "text": "Prometheus", "value": "Prometheus" }, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "allValue": ".*", + "current": { "selected": true, "text": "All", "value": "$__all" }, + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "definition": "label_values(furumusic_build_info, namespace)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "namespace", + "options": [], + "query": { "query": "label_values(furumusic_build_info, namespace)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "allValue": ".*", + "current": { "selected": true, "text": "All", "value": "$__all" }, + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "definition": "label_values(furumusic_http_requests_total{namespace=~\"$namespace\"}, route)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "route", + "options": [], + "query": { "query": "label_values(furumusic_http_requests_total{namespace=~\"$namespace\"}, route)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + } + ] + }, + "time": { "from": "now-6h", "to": "now" }, + "timepicker": {}, + "timezone": "", + "title": "Furumi Music", + "uid": "furumi-music", + "version": 1 + } diff --git a/k8s/core/prom-stack/kustomization.yaml b/k8s/core/prom-stack/kustomization.yaml index 32e6f64..40253e2 100644 --- a/k8s/core/prom-stack/kustomization.yaml +++ b/k8s/core/prom-stack/kustomization.yaml @@ -8,6 +8,7 @@ resources: - alertmanager-config.yaml - dashboards/telemt-dashboard-cm.yaml - dashboards/auth-proxy-dashboard-cm.yaml + - dashboards/furumi-dashboard-cm.yaml helmCharts: - name: kube-prometheus-stack