{ "title": "IncidentOps API Overview", "uid": "incidentops-api", "tags": ["incidentops", "api"], "timezone": "browser", "editable": true, "panels": [ { "id": 1, "title": "Request Rate", "type": "timeseries", "gridPos": {"h": 8, "w": 8, "x": 0, "y": 0}, "targets": [ { "datasource": {"type": "prometheus", "uid": "prometheus"}, "expr": "sum(rate(http_server_request_duration_seconds_count{job=\"incidentops-api\"}[1m]))", "legendFormat": "Requests/sec", "refId": "A" } ], "fieldConfig": { "defaults": { "color": {"mode": "palette-classic"}, "unit": "reqps" } } }, { "id": 2, "title": "Request Duration (p50, p95, p99)", "type": "timeseries", "gridPos": {"h": 8, "w": 8, "x": 8, "y": 0}, "targets": [ { "datasource": {"type": "prometheus", "uid": "prometheus"}, "expr": "histogram_quantile(0.50, sum(rate(http_server_request_duration_seconds_bucket{job=\"incidentops-api\"}[5m])) by (le))", "legendFormat": "p50", "refId": "A" }, { "datasource": {"type": "prometheus", "uid": "prometheus"}, "expr": "histogram_quantile(0.95, sum(rate(http_server_request_duration_seconds_bucket{job=\"incidentops-api\"}[5m])) by (le))", "legendFormat": "p95", "refId": "B" }, { "datasource": {"type": "prometheus", "uid": "prometheus"}, "expr": "histogram_quantile(0.99, sum(rate(http_server_request_duration_seconds_bucket{job=\"incidentops-api\"}[5m])) by (le))", "legendFormat": "p99", "refId": "C" } ], "fieldConfig": { "defaults": { "color": {"mode": "palette-classic"}, "unit": "s" } } }, { "id": 3, "title": "Error Rate", "type": "timeseries", "gridPos": {"h": 8, "w": 8, "x": 16, "y": 0}, "targets": [ { "datasource": {"type": "prometheus", "uid": "prometheus"}, "expr": "sum(rate(http_server_request_duration_seconds_count{job=\"incidentops-api\", http_status_code=~\"5..\"}[1m])) / sum(rate(http_server_request_duration_seconds_count{job=\"incidentops-api\"}[1m])) * 100", "legendFormat": "Error %", "refId": "A" } ], "fieldConfig": { "defaults": { "color": {"fixedColor": "red", "mode": "fixed"}, "unit": "percent", "min": 0, "max": 100 } } }, { "id": 4, "title": "Requests by Status Code", "type": "timeseries", "gridPos": {"h": 8, "w": 12, "x": 0, "y": 8}, "targets": [ { "datasource": {"type": "prometheus", "uid": "prometheus"}, "expr": "sum by (http_status_code) (rate(http_server_request_duration_seconds_count{job=\"incidentops-api\"}[1m]))", "legendFormat": "{{http_status_code}}", "refId": "A" } ], "fieldConfig": { "defaults": { "color": {"mode": "palette-classic"}, "unit": "reqps" } } }, { "id": 5, "title": "Requests by Endpoint", "type": "timeseries", "gridPos": {"h": 8, "w": 12, "x": 12, "y": 8}, "targets": [ { "datasource": {"type": "prometheus", "uid": "prometheus"}, "expr": "sum by (http_route) (rate(http_server_request_duration_seconds_count{job=\"incidentops-api\"}[1m]))", "legendFormat": "{{http_route}}", "refId": "A" } ], "fieldConfig": { "defaults": { "color": {"mode": "palette-classic"}, "unit": "reqps" } } }, { "id": 6, "title": "System CPU Usage", "type": "gauge", "gridPos": {"h": 6, "w": 6, "x": 0, "y": 16}, "targets": [ { "datasource": {"type": "prometheus", "uid": "prometheus"}, "expr": "avg(system_cpu_utilization{job=\"incidentops-api\"}) * 100", "refId": "A" } ], "fieldConfig": { "defaults": { "color": {"mode": "thresholds"}, "thresholds": { "mode": "absolute", "steps": [ {"color": "green", "value": null}, {"color": "yellow", "value": 60}, {"color": "red", "value": 80} ] }, "unit": "percent", "min": 0, "max": 100 } } }, { "id": 7, "title": "Memory Usage", "type": "gauge", "gridPos": {"h": 6, "w": 6, "x": 6, "y": 16}, "targets": [ { "datasource": {"type": "prometheus", "uid": "prometheus"}, "expr": "process_runtime_cpython_memory_bytes{job=\"incidentops-api\", type=\"rss\"} / 1024 / 1024", "refId": "A" } ], "fieldConfig": { "defaults": { "color": {"mode": "thresholds"}, "thresholds": { "mode": "absolute", "steps": [ {"color": "green", "value": null}, {"color": "yellow", "value": 256}, {"color": "red", "value": 512} ] }, "unit": "decmbytes" } } }, { "id": 8, "title": "Active Threads", "type": "stat", "gridPos": {"h": 6, "w": 6, "x": 12, "y": 16}, "targets": [ { "datasource": {"type": "prometheus", "uid": "prometheus"}, "expr": "process_runtime_cpython_thread_count{job=\"incidentops-api\"}", "refId": "A" } ], "fieldConfig": { "defaults": { "color": {"mode": "thresholds"}, "thresholds": { "mode": "absolute", "steps": [ {"color": "green", "value": null}, {"color": "yellow", "value": 50}, {"color": "red", "value": 100} ] } } } }, { "id": 9, "title": "GC Collections", "type": "stat", "gridPos": {"h": 6, "w": 6, "x": 18, "y": 16}, "targets": [ { "datasource": {"type": "prometheus", "uid": "prometheus"}, "expr": "sum(rate(process_runtime_cpython_gc_count{job=\"incidentops-api\"}[5m]))", "refId": "A" } ], "fieldConfig": { "defaults": { "color": {"mode": "thresholds"}, "thresholds": { "mode": "absolute", "steps": [ {"color": "green", "value": null} ] }, "unit": "cps" } } }, { "id": 10, "title": "Recent Logs", "type": "logs", "gridPos": {"h": 10, "w": 24, "x": 0, "y": 22}, "targets": [ { "datasource": {"type": "loki", "uid": "loki"}, "expr": "{service_name=\"incidentops-api\"} | json", "refId": "A" } ], "options": { "showTime": true, "showLabels": true, "wrapLogMessage": true, "enableLogDetails": true, "sortOrder": "Descending" } }, { "id": 11, "title": "Error Logs", "type": "logs", "gridPos": {"h": 8, "w": 24, "x": 0, "y": 32}, "targets": [ { "datasource": {"type": "loki", "uid": "loki"}, "expr": "{service_name=\"incidentops-api\"} |= \"ERROR\" | json", "refId": "A" } ], "options": { "showTime": true, "showLabels": true, "wrapLogMessage": true, "enableLogDetails": true, "sortOrder": "Descending" } }, { "id": 12, "title": "Recent Traces", "type": "traces", "gridPos": {"h": 10, "w": 24, "x": 0, "y": 40}, "targets": [ { "datasource": {"type": "tempo", "uid": "tempo"}, "queryType": "traceqlSearch", "filters": [ { "id": "service-name", "operator": "=", "scope": "resource", "tag": "service.name", "value": ["incidentops-api"] } ], "refId": "A" } ] } ], "schemaVersion": 38, "version": 2 }