forked from vllm-project/semantic-router
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdocker-compose.yml
More file actions
129 lines (121 loc) · 3.46 KB
/
docker-compose.yml
File metadata and controls
129 lines (121 loc) · 3.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
services:
# Semantic Router External Processor Service
semantic-router:
build:
context: .
dockerfile: Dockerfile.extproc
container_name: semantic-router
ports:
- "50051:50051"
volumes:
- ./config:/app/config:ro
- ./models:/app/models:ro
environment:
- LD_LIBRARY_PATH=/app/lib
- CONFIG_FILE=${CONFIG_FILE:-/app/config/config.yaml}
networks:
- semantic-network
healthcheck:
test: ["CMD", "curl", "-f", "localhost:8080/health"]
interval: 10s
timeout: 5s
retries: 5
start_period: 30s
# Envoy Proxy Service
envoy:
image: envoyproxy/envoy:v1.31.7
container_name: envoy-proxy
ports:
- "8801:8801" # Main proxy port
- "19000:19000" # Admin interface
volumes:
- ./config/envoy-docker.yaml:/etc/envoy/envoy.yaml:ro
command: ["/usr/local/bin/envoy", "-c", "/etc/envoy/envoy.yaml", "--component-log-level", "ext_proc:trace,router:trace,http:trace"]
depends_on:
semantic-router:
condition: service_healthy
networks:
- semantic-network
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:19000/ready"]
interval: 10s
timeout: 5s
retries: 5
start_period: 10s
# Mock vLLM service for testing profile
mock-vllm:
build:
context: ./tools/mock-vllm
dockerfile: Dockerfile
container_name: mock-vllm
profiles: ["testing"]
ports:
- "8000:8000"
networks:
semantic-network:
ipv4_address: 172.28.0.10
healthcheck:
test: ["CMD", "curl", "-fsS", "http://localhost:8000/health"]
interval: 10s
timeout: 5s
retries: 5
start_period: 5s
# Prometheus and Grafana for observability
prometheus:
image: prom/prometheus:v2.53.0
container_name: prometheus
volumes:
- ./tools/observability/prometheus.yaml:/etc/prometheus/prometheus.yaml:ro
- prometheus-data:/prometheus
command:
- --config.file=/etc/prometheus/prometheus.yaml
- --storage.tsdb.retention.time=15d
environment:
- ROUTER_TARGET=semantic-router:9190
ports:
- "9090:9090"
networks:
- semantic-network
grafana:
image: grafana/grafana:11.5.1
container_name: grafana
environment:
- GF_SECURITY_ADMIN_USER=admin
- GF_SECURITY_ADMIN_PASSWORD=admin
- PROMETHEUS_URL=prometheus:9090
ports:
- "3000:3000"
volumes:
- ./tools/observability/grafana-datasource.yaml:/etc/grafana/provisioning/datasources/datasource.yaml:ro
- ./tools/observability/grafana-dashboard.yaml:/etc/grafana/provisioning/dashboards/dashboard.yaml:ro
- ./tools/observability/llm-router-dashboard.json:/etc/grafana/provisioning/dashboards/llm-router-dashboard.json:ro
- grafana-data:/var/lib/grafana
networks:
- semantic-network
depends_on:
- prometheus
# LLM Katan service for testing
llm-katan:
build:
context: ./e2e-tests/llm-katan
dockerfile: Dockerfile
container_name: llm-katan
profiles: ["testing", "llm-katan"]
ports:
- "8002:8000"
environment:
- HUGGINGFACE_HUB_TOKEN=${HUGGINGFACE_HUB_TOKEN:-}
networks:
- semantic-network
command: ["llm-katan", "--model", "Qwen/Qwen3-0.6B", "--host", "0.0.0.0", "--port", "8000"]
networks:
semantic-network:
driver: bridge
ipam:
config:
- subnet: 172.28.0.0/16
volumes:
models-cache:
driver: local
prometheus-data:
grafana-data: