119 lines
4.6 KiB
TypeScript
119 lines
4.6 KiB
TypeScript
import { Callout } from "../components/Callout";
|
|
import { CodeBlock } from "../components/CodeBlock";
|
|
|
|
export function CircuitBreakerGuide() {
|
|
return (
|
|
<div>
|
|
<h1 id="circuit-breaker">Circuit Breaker & Failover</h1>
|
|
<p>
|
|
Veylant IA includes a per-provider circuit breaker that prevents cascading failures when an
|
|
LLM provider is degraded or unreachable.
|
|
</p>
|
|
|
|
<h2 id="states">Circuit Breaker States</h2>
|
|
<div className="grid grid-cols-1 sm:grid-cols-3 gap-4 my-4">
|
|
{[
|
|
{
|
|
state: "Closed",
|
|
color: "border-green-400",
|
|
bg: "bg-green-50 dark:bg-green-950/30",
|
|
desc: "Normal operation. Requests are forwarded to the provider. Failures are counted.",
|
|
},
|
|
{
|
|
state: "Open",
|
|
color: "border-red-400",
|
|
bg: "bg-red-50 dark:bg-red-950/30",
|
|
desc: "Provider bypassed. All requests use the fallback chain. Stays open for open_ttl seconds.",
|
|
},
|
|
{
|
|
state: "Half-Open",
|
|
color: "border-amber-400",
|
|
bg: "bg-amber-50 dark:bg-amber-950/30",
|
|
desc: "Testing if provider has recovered. One probe request sent. Success → Closed; Failure → Open.",
|
|
},
|
|
].map((item) => (
|
|
<div key={item.state} className={`rounded-lg border-l-4 p-4 ${item.color} ${item.bg}`}>
|
|
<h3 className="font-semibold text-sm mb-2">{item.state}</h3>
|
|
<p className="text-sm text-muted-foreground leading-relaxed">{item.desc}</p>
|
|
</div>
|
|
))}
|
|
</div>
|
|
|
|
<h2 id="configuration">Configuration</h2>
|
|
<CodeBlock
|
|
language="yaml"
|
|
code={`circuit_breaker:
|
|
threshold: 5 # consecutive failures to open the breaker
|
|
open_ttl: 60s # how long to stay open before half-open probe`}
|
|
/>
|
|
|
|
<Callout type="tip" title="Per-provider isolation">
|
|
Each provider has an independent circuit breaker. A failing Azure deployment does not affect
|
|
OpenAI or Anthropic calls.
|
|
</Callout>
|
|
|
|
<h2 id="fallback">Fallback Chain</h2>
|
|
<p>
|
|
When the primary provider's circuit is open, the routing engine uses the{" "}
|
|
<code>fallback_providers</code> array from the matched routing rule:
|
|
</p>
|
|
<CodeBlock
|
|
language="json"
|
|
code={`{
|
|
"provider": "azure",
|
|
"fallback_providers": ["anthropic", "openai"],
|
|
"conditions": [{"field": "user.department", "operator": "eq", "value": "legal"}]
|
|
}
|
|
// If azure is open → try anthropic → if anthropic is open → try openai → if all fail → 503`}
|
|
/>
|
|
|
|
<h2 id="check-status">Checking Status</h2>
|
|
<CodeBlock
|
|
language="bash"
|
|
code={`curl http://localhost:8090/v1/admin/providers/status \\
|
|
-H "Authorization: Bearer $TOKEN"
|
|
|
|
# Response:
|
|
{
|
|
"data": [
|
|
{"provider": "openai", "state": "closed", "failures": 0, "last_failure": null},
|
|
{"provider": "anthropic", "state": "closed", "failures": 0, "last_failure": null},
|
|
{"provider": "azure", "state": "open", "failures": 5, "last_failure": "2026-01-15T14:30:00Z"},
|
|
{"provider": "mistral", "state": "half-open", "failures": 3, "last_failure": "2026-01-15T14:28:00Z"},
|
|
{"provider": "ollama", "state": "closed", "failures": 0, "last_failure": null}
|
|
]
|
|
}`}
|
|
/>
|
|
|
|
<h2 id="prometheus">Prometheus Alert</h2>
|
|
<p>
|
|
The <code>CircuitBreakerOpen</code> alert fires when any provider is in the open state:
|
|
</p>
|
|
<CodeBlock
|
|
language="yaml"
|
|
code={`- alert: CircuitBreakerOpen
|
|
expr: veylant_circuit_breaker_state > 0
|
|
for: 0m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "Provider {{ $labels.provider }} circuit breaker is open"
|
|
description: "The circuit breaker for {{ $labels.provider }} has opened after repeated failures."`}
|
|
/>
|
|
|
|
<h2 id="graceful-degradation">Development Mode Degradation</h2>
|
|
<Callout type="info">
|
|
In <code>server.env=development</code>, Veylant IA degrades gracefully if services are
|
|
unreachable:
|
|
<ul className="mt-2 space-y-1">
|
|
<li><strong>Keycloak unreachable</strong> → MockVerifier (auth bypassed)</li>
|
|
<li><strong>PostgreSQL unreachable</strong> → routing disabled, feature flags use in-memory defaults</li>
|
|
<li><strong>ClickHouse unreachable</strong> → audit logging disabled</li>
|
|
<li><strong>PII service unreachable</strong> → PII skipped if <code>fail_open=true</code></li>
|
|
</ul>
|
|
In <code>production</code> mode, any of the above causes a fatal startup error.
|
|
</Callout>
|
|
</div>
|
|
);
|
|
}
|