Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions api-reference/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@
"compression": {
"saved_tokens": 450,
"cost_savings": 27000,
"reduction": 48,
"reduction": 48.99884991374353,
"time_ms": 12
}
}
Expand Down Expand Up @@ -1378,11 +1378,11 @@
"example": 27000
},
"reduction": {
"type": "integer",
"type": "number",
"description": "Percentage reduction in input tokens. For example, `48` means the compressed prompt was 48% smaller than the original.",
"minimum": 0,
"maximum": 100,
"example": 48
"example": 48.99884991374353
},
"time_ms": {
"type": "integer",
Expand Down
6 changes: 3 additions & 3 deletions features/observability.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -218,9 +218,9 @@ const response = await edgee.send({

// Compression details
if (response.compression) {
console.log(response.compression.input_tokens); // Original token count
console.log(response.usage.prompt_tokens); // After compression
console.log(response.compression.saved_tokens); // Tokens saved
console.log(`${(response.compression.rate * 100).toFixed(1)}%`); // Compression rate (e.g., 61.0%)
console.log(response.compression.cost_savings); // Cost savings in micro-units (e.g. 27000 = $0.027)
console.log(response.compression.reduction); // Percentage reduction (e.g. 48 = 48%)
console.log(response.compression.time_ms); // Time taken for compression in milliseconds
}
```
12 changes: 7 additions & 5 deletions sdk/go/send.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -223,9 +223,10 @@ Token compression metrics (when compression is applied):

| Property | Type | Description |
|----------|------|-------------|
| `InputTokens` | `int` | Original number of input tokens before compression |
| `SavedTokens` | `int` | Number of tokens saved by compression |
| `Rate` | `float64` | Compression rate as a decimal (0-1). For example, `0.61` means 61% compression |
| `CostSavings` | `int` | Estimated cost savings in micro-units (e.g. 27000 = $0.027) |
| `Reduction` | `float64` | Percentage reduction (e.g. 48 = 48%, may be fractional) |
| `TimeMs` | `int` | Time taken for compression in milliseconds |

**Example - Accessing Compression Metrics:**

Expand All @@ -242,9 +243,10 @@ if err != nil {
}

if response.Compression != nil {
fmt.Printf("Original input tokens: %d\n", response.Compression.InputTokens)
fmt.Printf("Tokens saved: %d\n", response.Compression.SavedTokens)
fmt.Printf("Compression rate: %.1f%%\n", response.Compression.Rate * 100)
fmt.Printf("Saved tokens: %d\n", response.Compression.SavedTokens)
fmt.Printf("Reduction: %.1f%%\n", response.Compression.Reduction)
Comment thread
SachaMorard marked this conversation as resolved.
fmt.Printf("Cost savings: $%.3f\n", float64(response.Compression.CostSavings)/1000000)
fmt.Printf("Time: %d ms\n", response.Compression.TimeMs)
}
```

Expand Down
12 changes: 7 additions & 5 deletions sdk/python/send.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -192,9 +192,10 @@ Token compression metrics (when compression is applied):

| Property | Type | Description |
|----------|------|-------------|
| `input_tokens` | `int` | Original number of input tokens before compression |
| `saved_tokens` | `int` | Number of tokens saved by compression |
| `rate` | `float` | Compression rate as a decimal (0-1). For example, `0.61` means 61% compression |
| `cost_savings` | `int` | Estimated cost savings in micro-units (e.g. 27000 = $0.027) |
| `reduction` | `int` | Percentage reduction (e.g. 48 = 48%) |
| `time_ms` | `int` | Time taken for compression in milliseconds |

**Example - Accessing Compression Metrics:**

Expand All @@ -211,9 +212,10 @@ response = edgee.send(
)

if response.compression:
print(f"Original input tokens: {response.compression.input_tokens}")
print(f"Tokens saved: {response.compression.saved_tokens}")
print(f"Compression rate: {response.compression.rate * 100:.1f}%")
print(f"Saved tokens: {response.compression.saved_tokens}")
print(f"Reduction: {response.compression.reduction}%")
print(f"Cost savings: ${response.compression.cost_savings / 1_000_000:.3f}")
print(f"Time: {response.compression.time_ms} ms")
```

<Note>
Expand Down
14 changes: 8 additions & 6 deletions sdk/rust/send.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -206,26 +206,28 @@ Token compression metrics (when compression is applied):

| Property | Type | Description |
|----------|------|-------------|
| `input_tokens` | `u32` | Original number of input tokens before compression |
| `saved_tokens` | `u32` | Number of tokens saved by compression |
| `rate` | `f64` | Compression rate as a decimal (0-1). For example, `0.61` means 61% compression |
| `cost_savings` | `u64` | Estimated cost savings in micro-units (e.g. 27000 = $0.027) |
| `reduction` | `f64` | Percentage reduction (e.g. 48 = 48%, may be fractional) |
| `time_ms` | `u32` | Time taken for compression in milliseconds |

**Example - Accessing Compression Metrics:**

```rust
let input = InputObject::new(vec![
Message::user("Analyze this long document with lots of context...")
])
.with_enable_compression(true)
.with_compression(true)
.with_compression_rate(0.8); // Target 80% compression

let response = client.send("gpt-5.2", input).await?;
println!("{}", response.text().unwrap_or(""));

if let Some(compression) = &response.compression {
println!("Original input tokens: {}", compression.input_tokens);
println!("Tokens saved: {}", compression.saved_tokens);
println!("Compression rate: {:.1}%", compression.rate * 100.0);
println!("Saved tokens: {}", compression.saved_tokens);
println!("Reduction: {:.1}%", compression.reduction);
println!("Cost savings: ${:.3}", compression.cost_savings as f64 / 1_000_000.0);
println!("Time: {} ms", compression.time_ms);
}
```

Expand Down
12 changes: 7 additions & 5 deletions sdk/typescript/send.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -197,9 +197,10 @@ Token compression metrics (when compression is applied):

| Property | Type | Description |
|----------|------|-------------|
| `input_tokens` | `number` | Original number of input tokens before compression |
| `saved_tokens` | `number` | Number of tokens saved by compression |
| `rate` | `number` | Compression rate as a decimal (0-1). For example, `0.61` means 61% compression |
| `cost_savings` | `number` | Estimated cost savings in micro-units (e.g. 27000 = $0.027) |
| `reduction` | `number` | Percentage reduction (e.g. 48 = 48%, may be fractional) |
| `time_ms` | `number` | Time taken for compression in milliseconds |

**Example - Accessing Compression Metrics:**

Expand All @@ -216,9 +217,10 @@ const response = await edgee.send({
});

if (response.compression) {
console.log(`Original input tokens: ${response.compression.input_tokens}`);
console.log(`Tokens saved: ${response.compression.saved_tokens}`);
console.log(`Compression rate: ${(response.compression.rate * 100).toFixed(1)}%`);
console.log(`Saved tokens: ${response.compression.saved_tokens}`);
console.log(`Reduction: ${response.compression.reduction}%`);
console.log(`Cost savings: $${(response.compression.cost_savings / 1_000_000).toFixed(3)}`);
console.log(`Time: ${response.compression.time_ms} ms`);
}
```

Expand Down