feat: add observability stack and background task infrastructure
Add OpenTelemetry instrumentation with distributed tracing and metrics: - Structured JSON logging with trace context correlation - Auto-instrumentation for FastAPI, asyncpg, httpx, redis - OTLP exporter for traces and Prometheus metrics endpoint Implement Celery worker and notification task system: - Celery app with Redis/SQS broker support and configurable queues - Notification tasks for incident fan-out, webhooks, and escalations - Pluggable TaskQueue abstraction with in-memory driver for testing Add Grafana observability stack (Loki, Tempo, Prometheus, Grafana): - OpenTelemetry Collector for receiving OTLP traces and logs - Tempo for distributed tracing backend - Loki for log aggregation with Promtail DaemonSet - Prometheus for metrics scraping with RBAC configuration - Grafana with pre-provisioned datasources and API overview dashboard - Helm templates for all observability components Enhance application infrastructure: - Global exception handlers with structured ErrorResponse schema - Request logging middleware with timing metrics - Health check updated to verify task queue connectivity - Non-root user in Dockerfile for security - Init containers in Helm deployments for dependency ordering - Production Helm values with autoscaling and retention policies
This commit is contained in:
@@ -4,7 +4,7 @@ from __future__ import annotations
|
||||
|
||||
import os
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import AsyncGenerator, Callable
|
||||
from typing import AsyncGenerator, Callable, Generator
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
import asyncpg
|
||||
@@ -15,8 +15,11 @@ import pytest
|
||||
os.environ.setdefault("DATABASE_URL", "postgresql://incidentops:incidentops@localhost:5432/incidentops_test")
|
||||
os.environ.setdefault("JWT_SECRET_KEY", "test-secret-key-for-testing-only")
|
||||
os.environ.setdefault("REDIS_URL", "redis://localhost:6379/1")
|
||||
os.environ.setdefault("TASK_QUEUE_DRIVER", "inmemory")
|
||||
os.environ.setdefault("TASK_QUEUE_BROKER_URL", "redis://localhost:6379/2")
|
||||
|
||||
from app.main import app
|
||||
from app.taskqueue import task_queue
|
||||
|
||||
|
||||
# Module-level setup: create database and run migrations once
|
||||
@@ -163,3 +166,14 @@ async def db_admin(clean_database: None) -> AsyncGenerator[asyncpg.Connection, N
|
||||
yield conn
|
||||
finally:
|
||||
await conn.close()
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def reset_task_queue() -> Generator[None, None, None]:
|
||||
"""Ensure in-memory task queue state is cleared between tests."""
|
||||
|
||||
if hasattr(task_queue, "reset"):
|
||||
task_queue.reset()
|
||||
yield
|
||||
if hasattr(task_queue, "reset"):
|
||||
task_queue.reset()
|
||||
|
||||
Reference in New Issue
Block a user