ModelMesh/modelmesh.example.yaml at master · ApartsinProjects/ModelMesh · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# ModelMesh proxy configuration — multi-provider pool with auto-failover
#
# Copy this file:  cp modelmesh.example.yaml modelmesh.yaml
# Then fill in your API keys in .env (or set them as environment variables).
#
# The proxy exposes a standard OpenAI REST API on the configured port.
# Internally, ModelMesh routes to the best available provider in the pool,
# automatically failing over when a provider is down or rate-limited.

# ── Secret Store ────────────────────────────────────────────────────────
# Resolves ${secrets:VAR_NAME} references from environment variables.
secrets:
  store: modelmesh.env.v1

# ── Providers ───────────────────────────────────────────────────────────
# Enable only the providers you have API keys for.
# At least one provider is required.
providers:
  openai.llm.v1:
    api_key: ${secrets:OPENAI_API_KEY}
    # budget:
    #   daily_limit: 10.00    # optional: cap daily spend

  # Uncomment if you have an Anthropic key:
  # anthropic.claude.v1:
  #   api_key: ${secrets:ANTHROPIC_API_KEY}
  #   budget:
  #     daily_limit: 10.00

  # Uncomment if you have a Groq key:
  # groq.api.v1:
  #   api_key: ${secrets:GROQ_API_KEY}

# ── Models ──────────────────────────────────────────────────────────────
# Register models with their capabilities and constraints.
models:
  gpt-4o-mini:
    provider: openai.llm.v1
    capabilities:
      - generation.text-generation.chat-completion
    delivery:
      synchronous: true
      streaming: true
    features:
      tool_calling: true
      structured_output: true
      json_mode: true
      system_prompt: true
    constraints:
      context_window: 128000
      max_output_tokens: 16384

  # claude-3-5-haiku:
  #   provider: anthropic.claude.v1
  #   capabilities:
  #     - generation.text-generation.chat-completion
  #   delivery:
  #     synchronous: true
  #     streaming: true
  #   features:
  #     tool_calling: true
  #     system_prompt: true
  #   constraints:
  #     context_window: 200000
  #     max_output_tokens: 8192

  # llama-3.3-70b:
  #   provider: groq.api.v1
  #   capabilities:
  #     - generation.text-generation.chat-completion
  #   delivery:
  #     synchronous: true
  #     streaming: true
  #   features:
  #     tool_calling: true
  #     system_prompt: true
  #   constraints:
  #     context_window: 131072
  #     max_output_tokens: 32768

# ── Pools ───────────────────────────────────────────────────────────────
# Pools group models by capability. The rotation strategy decides which
# model to use and when to rotate on failure.
pools:
  text-generation:
    strategy: modelmesh.stick-until-failure.v1
    capability: generation.text-generation
    # on_budget_exceeded: rotate   # "rotate" | "error" (default: "error")