humanloop-python/tests/integration/conftest.py at 92d2cd5d07bded3e301955aaa509f0709b9c553f · humanloop/humanloop-python · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
import io
import os
import uuid
from contextlib import contextmanager, redirect_stdout
from dataclasses import dataclass
from typing import Any, ContextManager, Generator, TextIO

import dotenv
import pytest
from humanloop.client import Humanloop
from humanloop.requests.prompt_kernel_request import PromptKernelRequestParams


@dataclass
class TestIdentifiers:
    file_id: str
    file_path: str


@pytest.fixture()
def capture_stdout() -> ContextManager[TextIO]:
    @contextmanager
    def _context_manager():
        f = io.StringIO()
        with redirect_stdout(f):
            yield f

    return _context_manager  # type: ignore [return-value]


@pytest.fixture(scope="session")
def openai_key() -> str:
    dotenv.load_dotenv()
    if not os.getenv("OPENAI_API_KEY"):
        pytest.fail("OPENAI_API_KEY is not set for integration tests")
    return os.getenv("OPENAI_API_KEY")  # type: ignore [return-value]


@pytest.fixture(scope="session")
def humanloop_test_client() -> Humanloop:
    dotenv.load_dotenv()
    if not os.getenv("HUMANLOOP_API_KEY"):
        pytest.fail("HUMANLOOP_API_KEY is not set for integration tests")
    return Humanloop(api_key=os.getenv("HUMANLOOP_API_KEY"))  # type: ignore [return-value]


@pytest.fixture(scope="function")
def sdk_test_dir(humanloop_test_client: Humanloop) -> Generator[str, None, None]:
    path = f"SDK_INTEGRATION_TEST_{uuid.uuid4()}"
    try:
        response = humanloop_test_client.directories.create(path=path)
        yield response.path
        humanloop_test_client.directories.delete(id=response.id)
    except Exception as e:
        pytest.fail(f"Failed to create directory {path}: {e}")


@pytest.fixture(scope="function")
def test_prompt_config() -> PromptKernelRequestParams:
    return {
        "provider": "openai",
        "model": "gpt-4o-mini",
        "temperature": 0.5,
        "template": [
            {
                "role": "system",
                "content": "You are a helpful assistant. You must answer the user's question truthfully and at the level of a 5th grader.",
            },
            {
                "role": "user",
                "content": "{{question}}",
            },
        ],
    }


@pytest.fixture(scope="function")
def eval_dataset(humanloop_test_client: Humanloop, sdk_test_dir: str) -> Generator[TestIdentifiers, None, None]:
    dataset_path = f"{sdk_test_dir}/eval_dataset"
    try:
        response = humanloop_test_client.datasets.upsert(
            path=dataset_path,
            datapoints=[
                {
                    "inputs": {
                        "question": "What is the capital of the France?",
                    },
                },
                {
                    "inputs": {
                        "question": "What is the capital of the Germany?",
                    },
                },
                {
                    "inputs": {
                        "question": "What is 2+2?",
                    },
                },
            ],
        )
        yield TestIdentifiers(file_id=response.id, file_path=response.path)
        humanloop_test_client.datasets.delete(id=response.id)
    except Exception as e:
        pytest.fail(f"Failed to create dataset {dataset_path}: {e}")


@pytest.fixture(scope="function")
def eval_prompt(
    humanloop_test_client: Humanloop, sdk_test_dir: str, openai_key: str, test_prompt_config: dict[str, Any]
) -> Generator[TestIdentifiers, None, None]:
    prompt_path = f"{sdk_test_dir}/eval_prompt"
    try:
        response = humanloop_test_client.prompts.upsert(
            path=prompt_path,
            **test_prompt_config,
        )
        yield TestIdentifiers(file_id=response.id, file_path=response.path)
        humanloop_test_client.prompts.delete(id=response.id)
    except Exception as e:
        pytest.fail(f"Failed to create prompt {prompt_path}: {e}")


@pytest.fixture(scope="function")
def prompt(
    humanloop_test_client: Humanloop, sdk_test_dir: str, openai_key: str, test_prompt_config: dict[str, Any]
) -> Generator[TestIdentifiers, None, None]:
    prompt_path = f"{sdk_test_dir}/prompt"
    try:
        response = humanloop_test_client.prompts.upsert(
            path=prompt_path,
            **test_prompt_config,
        )
        yield TestIdentifiers(file_id=response.id, file_path=response.path)
        humanloop_test_client.prompts.delete(id=response.id)
    except Exception as e:
        pytest.fail(f"Failed to create prompt {prompt_path}: {e}")


@pytest.fixture(scope="function")
def output_not_null_evaluator(
    humanloop_test_client: Humanloop, sdk_test_dir: str
) -> Generator[TestIdentifiers, None, None]:
    evaluator_path = f"{sdk_test_dir}/output_not_null_evaluator"
    try:
        response = humanloop_test_client.evaluators.upsert(
            path=evaluator_path,
            spec={
                "arguments_type": "target_required",
                "return_type": "boolean",
                "code": """
def output_not_null(log: dict) -> bool:
    return log["output"] is not None
                """,
                "evaluator_type": "python",
            },
        )
        yield TestIdentifiers(file_id=response.id, file_path=response.path)
        humanloop_test_client.evaluators.delete(id=response.id)
    except Exception as e:
        pytest.fail(f"Failed to create evaluator {evaluator_path}: {e}")


@pytest.fixture(scope="function")
def id_for_staging_environment(humanloop_test_client: Humanloop, eval_prompt: TestIdentifiers) -> str:
    response = humanloop_test_client.prompts.list_environments(id=eval_prompt.file_id)
    for environment in response:
        if environment.name == "staging":
            return environment.id
    pytest.fail("Staging environment not found")