parakeet-api/test_memory_optimization.py at main · tulas75/parakeet-api · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#!/usr/bin/env python3
"""
Test memory optimization features

This script verifies that memory optimization features are working properly.
"""

import os
import sys
import json
import time
import requests
from pathlib import Path

def test_memory_optimization():
    """Test memory optimization features"""
    print("🧪 Testing memory optimization features")
    print("=" * 50)

    # Test 1: Check basic configuration
    print("📋 Test 1: Checking basic configuration")

    # Check key configuration items
    idle_timeout = os.environ.get('IDLE_TIMEOUT_MINUTES', '30')
    aggressive_cleanup = os.environ.get('AGGRESSIVE_MEMORY_CLEANUP', 'true')
    cleanup_interval = os.environ.get('IDLE_MEMORY_CLEANUP_INTERVAL', '120')

    print(f"  ✅ Model idle timeout: {idle_timeout} minutes")
    print(f"  ✅ Aggressive memory cleanup: {aggressive_cleanup}")
    print(f"  ✅ Cleanup interval: {cleanup_interval} seconds")

    print()

    # Test 2: Verify health check endpoint
    print("📋 Test 2: Verifying health check endpoint")
    try:
        response = requests.get('http://localhost:5092/health', timeout=10)
        if response.status_code == 200:
            health_data = response.json()
            optimization = health_data.get('optimization', {})

            required_fields = [
                'aggressive_memory_cleanup',
                'idle_timeout_minutes',
                'idle_memory_cleanup_interval'
            ]

            for field in required_fields:
                if field in optimization:
                    print(f"  ✅ {field}: {optimization[field]}")
                else:
                    print(f"  ❌ {field}: Missing")

            print(f"  📊 Current idle status: {health_data.get('model', {}).get('idle_status', 'unknown')}")

            if 'gpu' in health_data:
                gpu_info = health_data['gpu']
                if gpu_info.get('available'):
                    memory_gb = gpu_info.get('memory_allocated_gb', 0)
                    print(f"  📊 GPU memory usage: {memory_gb:.2f}GB")

            if 'system' in health_data:
                system_info = health_data['system']
                memory_total = system_info.get('memory_total_gb', 0)
                memory_percent = system_info.get('memory_usage_percent', 0)
                memory_used = memory_total * memory_percent / 100
                print(f"  📊 System memory usage: {memory_used:.2f}GB / {memory_total:.2f}GB ({memory_percent:.1f}%)")
        else:
            print(f"  ❌ Health check failed: HTTP {response.status_code}")
    except Exception as e:
        print(f"  ⚠️ Unable to connect to service: {e}")
        print("  💡 Tip: Make sure the service is running at localhost:5092")

    print()

    # Test 3: Show default configuration
    print("📋 Test 3: Default memory optimization configuration")
    print("  💡 The system uses reasonable default configurations, no manual environment variable setting required:")
    print("     - Model automatically unloads after 30 minutes of inactivity")
    print("     - Basic memory cleanup executes after each batch processing")
    print("     - Regular cleanup of invalid memory usage during idle periods")
    print("     - Forced cleanup only triggered at extremely high memory usage")
    print()
    print("  📝 These settings can effectively reduce memory usage during idle periods")

    print()
    print("🎯 Test complete!")
    print("💡 The system is optimized for out-of-the-box usage, no additional configuration required")

if __name__ == "__main__":
    test_memory_optimization()