edgee-ai · SachaMorard · Mar 6, 2026 · Mar 6, 2026 · Mar 6, 2026 · Mar 6, 2026
@@ -101,7 +101,7 @@
                   "compression": {
                     "saved_tokens": 450,
                     "cost_savings": 27000,
-                    "reduction": 48,
+                    "reduction": 48.99884991374353,
                     "time_ms": 12
                   }
                 }
@@ -1378,11 +1378,11 @@
             "example": 27000
           },
           "reduction": {
-            "type": "integer",
+            "type": "number",
             "description": "Percentage reduction in input tokens. For example, `48` means the compressed prompt was 48% smaller than the original.",
             "minimum": 0,
             "maximum": 100,
-            "example": 48
+            "example": 48.99884991374353
           },
           "time_ms": {
             "type": "integer",

@@ -218,9 +218,9 @@ const response = await edgee.send({
 
 // Compression details
 if (response.compression) {
-  console.log(response.compression.input_tokens); // Original token count
-  console.log(response.usage.prompt_tokens); // After compression
   console.log(response.compression.saved_tokens); // Tokens saved
-  console.log(`${(response.compression.rate * 100).toFixed(1)}%`); // Compression rate (e.g., 61.0%)
+  console.log(response.compression.cost_savings); // Cost savings in micro-units (e.g. 27000 = $0.027)
+  console.log(response.compression.reduction); // Percentage reduction (e.g. 48 = 48%)
+  console.log(response.compression.time_ms); // Time taken for compression in milliseconds
 }
 ```
@@ -223,9 +223,10 @@ Token compression metrics (when compression is applied):
 
 | Property | Type | Description |
 |----------|------|-------------|
-| `InputTokens` | `int` | Original number of input tokens before compression |
 | `SavedTokens` | `int` | Number of tokens saved by compression |
-| `Rate` | `float64` | Compression rate as a decimal (0-1). For example, `0.61` means 61% compression |
+| `CostSavings` | `int` | Estimated cost savings in micro-units (e.g. 27000 = $0.027) |
+| `Reduction` | `float64` | Percentage reduction (e.g. 48 = 48%, may be fractional) |
+| `TimeMs` | `int` | Time taken for compression in milliseconds |
 
 **Example - Accessing Compression Metrics:**
 
@@ -242,9 +243,10 @@ if err != nil {
 }
 
 if response.Compression != nil {
-    fmt.Printf("Original input tokens: %d\n", response.Compression.InputTokens)
-    fmt.Printf("Tokens saved: %d\n", response.Compression.SavedTokens)
-    fmt.Printf("Compression rate: %.1f%%\n", response.Compression.Rate * 100)
+    fmt.Printf("Saved tokens: %d\n", response.Compression.SavedTokens)
+    fmt.Printf("Reduction: %.1f%%\n", response.Compression.Reduction)
+    fmt.Printf("Cost savings: $%.3f\n", float64(response.Compression.CostSavings)/1000000)
+    fmt.Printf("Time: %d ms\n", response.Compression.TimeMs)
 }
 ```
 

@@ -192,9 +192,10 @@ Token compression metrics (when compression is applied):
 
 | Property | Type | Description |
 |----------|------|-------------|
-| `input_tokens` | `int` | Original number of input tokens before compression |
 | `saved_tokens` | `int` | Number of tokens saved by compression |
-| `rate` | `float` | Compression rate as a decimal (0-1). For example, `0.61` means 61% compression |
+| `cost_savings` | `int` | Estimated cost savings in micro-units (e.g. 27000 = $0.027) |
+| `reduction` | `int` | Percentage reduction (e.g. 48 = 48%) |
+| `time_ms` | `int` | Time taken for compression in milliseconds |
 
 **Example - Accessing Compression Metrics:**
 
@@ -211,9 +212,10 @@ response = edgee.send(
 )
 
 if response.compression:
-    print(f"Original input tokens: {response.compression.input_tokens}")
-    print(f"Tokens saved: {response.compression.saved_tokens}")
-    print(f"Compression rate: {response.compression.rate * 100:.1f}%")
+    print(f"Saved tokens: {response.compression.saved_tokens}")
+    print(f"Reduction: {response.compression.reduction}%")
+    print(f"Cost savings: ${response.compression.cost_savings / 1_000_000:.3f}")
+    print(f"Time: {response.compression.time_ms} ms")
 ```
 
 <Note>

@@ -206,26 +206,28 @@ Token compression metrics (when compression is applied):
 
 | Property | Type | Description |
 |----------|------|-------------|
-| `input_tokens` | `u32` | Original number of input tokens before compression |
 | `saved_tokens` | `u32` | Number of tokens saved by compression |
-| `rate` | `f64` | Compression rate as a decimal (0-1). For example, `0.61` means 61% compression |
+| `cost_savings` | `u64` | Estimated cost savings in micro-units (e.g. 27000 = $0.027) |
+| `reduction` | `f64` | Percentage reduction (e.g. 48 = 48%, may be fractional) |
+| `time_ms` | `u32` | Time taken for compression in milliseconds |
 
 **Example - Accessing Compression Metrics:**
 
 ```rust
 let input = InputObject::new(vec![
     Message::user("Analyze this long document with lots of context...")
 ])
-.with_enable_compression(true)
+.with_compression(true)
 .with_compression_rate(0.8); // Target 80% compression
 
 let response = client.send("gpt-5.2", input).await?;
 println!("{}", response.text().unwrap_or(""));
 
 if let Some(compression) = &response.compression {
-    println!("Original input tokens: {}", compression.input_tokens);
-    println!("Tokens saved: {}", compression.saved_tokens);
-    println!("Compression rate: {:.1}%", compression.rate * 100.0);
+    println!("Saved tokens: {}", compression.saved_tokens);
+    println!("Reduction: {:.1}%", compression.reduction);
+    println!("Cost savings: ${:.3}", compression.cost_savings as f64 / 1_000_000.0);
+    println!("Time: {} ms", compression.time_ms);
 }
 ```
 

@@ -197,9 +197,10 @@ Token compression metrics (when compression is applied):
 
 | Property | Type | Description |
 |----------|------|-------------|
-| `input_tokens` | `number` | Original number of input tokens before compression |
 | `saved_tokens` | `number` | Number of tokens saved by compression |
-| `rate` | `number` | Compression rate as a decimal (0-1). For example, `0.61` means 61% compression |
+| `cost_savings` | `number` | Estimated cost savings in micro-units (e.g. 27000 = $0.027) |
+| `reduction` | `number` | Percentage reduction (e.g. 48 = 48%, may be fractional) |
+| `time_ms` | `number` | Time taken for compression in milliseconds |
 
 **Example - Accessing Compression Metrics:**
 
@@ -216,9 +217,10 @@ const response = await edgee.send({
 });
 
 if (response.compression) {
-  console.log(`Original input tokens: ${response.compression.input_tokens}`);
-  console.log(`Tokens saved: ${response.compression.saved_tokens}`);
-  console.log(`Compression rate: ${(response.compression.rate * 100).toFixed(1)}%`);
+  console.log(`Saved tokens: ${response.compression.saved_tokens}`);
+  console.log(`Reduction: ${response.compression.reduction}%`);
+  console.log(`Cost savings: $${(response.compression.cost_savings / 1_000_000).toFixed(3)}`);
+  console.log(`Time: ${response.compression.time_ms} ms`);
 }
 ```