Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions exp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ GIT_LFS_SKIP_SMUDGE=1 pip install -e .[bench]

Then run the following command to set up the MLE-Bench:
```shell
mle-exp init
mle bench init
```

## Benchmarking (Lite)
Expand All @@ -26,11 +26,11 @@ The dataset will be downloaded to the system default cache directory.
Prepare the lite dataset ([15 smaller datasets](https://github.com/openai/mle-bench?tab=readme-ov-file#lite-evaluation)):
```shell
# Prepare lite dataset (smaller version of the dataset)
mle-exp prepare --lite
mle bench prepare --lite
```
Alternatively, you can prepare the dataset for a specific competition:
```shell
mle-exp prepare -c <competition-id>
mle bench prepare -c <competition-id>
```
### Run MLE Agent (WIP)
```shell
Expand All @@ -39,7 +39,7 @@ mle kaggle <competition-id>

### Grade submission
```shell
mle-exp grade-sample <PATH_TO_SUBMISSION> <competition-id>
mle bench grade-sample <PATH_TO_SUBMISSION> <competition-id>
```

## Advance (Run MLE-Agent on the Full Dataset)
Expand All @@ -48,7 +48,7 @@ mle-exp grade-sample <PATH_TO_SUBMISSION> <competition-id>

### Prepare full 75 datasets
```shell
mle-exp prepare --all
mle bench prepare --all
```
### Run MLE Agent (WIP)
```shell
Expand All @@ -57,5 +57,5 @@ mle kaggle <competition-id>

### Grade submission
```shell
mle-exp grade-sample <PATH_TO_SUBMISSION> <competition-id>
mle bench grade-sample <PATH_TO_SUBMISSION> <competition-id>
```
10 changes: 5 additions & 5 deletions exp/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,14 @@ def wrapper(ctx, *args, **kwargs):
)
@click.pass_context
@click.version_option(version=mle.__version__)
def cli(ctx: click.Context):
def bench(ctx: click.Context):
"""
MLE-Exp: The Experimental CLI tool for MLE-agent.
"""
ctx.obj = {"registry": registry}


@cli.command(
@bench.command(
name="init",
short_help="Initialize MLE-bench",
)
Expand All @@ -86,7 +86,7 @@ def init(force: bool):
sys.exit(init_api(force))


@cli.command("prepare", help="Download and prepare one or more competitions.")
@bench.command("prepare", help="Download and prepare one or more competitions.")
@click.option(
"-c",
"--competition-id",
Expand Down Expand Up @@ -182,7 +182,7 @@ def _read_text_utf8(self, *args, **kwargs): # type: ignore[override]
sys.exit(1)


@cli.command(
@bench.command(
"grade",
help="Grade a submission covering *all* eval competitions (JSONL format).",
)
Expand Down Expand Up @@ -256,7 +256,7 @@ def grade(
sys.exit(1)


@cli.command(
@bench.command(
"grade-sample",
help="Grade a *single* competition CSV submission inside the eval.",
)
Expand Down
9 changes: 9 additions & 0 deletions mle/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -553,3 +553,12 @@ def traces(component, limit, full_output):
console.print("[yellow]Tip: Use --full-output flag to see complete trace data[/yellow]")

memory.close()


# Experimental commands
try:
from exp.cli import bench

cli.add_command(bench, name='bench')
except ImportError:
exp = None
12 changes: 4 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ name = "mle-agent"
version = "0.4.3"
description = "MLE-agent: An agent to automate your MLE processes"
readme = "README.md"
requires-python = ">=3.8.1"
requires-python = ">=3.9"
license = {text = "Apache-2.0"}
authors = [
{ name = "Yizheng Huang", email = "huangyz0918@gmail.com" },
Expand All @@ -28,8 +28,7 @@ dependencies = [
"rich>=13.6.0",
"click>=7.1.1",
"tantivy==0.24.0",
"py7zr~=1.0 ; python_version >= '3.9'",
"py7zr<0.20 ; python_version < '3.9'",
"py7zr~=1.0",
"openai~=1.70.0",
"pyyaml~=6.0",
"kaggle>=1.5.12",
Expand All @@ -38,16 +37,14 @@ dependencies = [
"requests~=2.32.3",
"GitPython~=3.1",
"questionary~=1.10.0",
"pandas>=2.2.2 ; python_version >= '3.9'",
"pandas==2.0.3 ; python_version < '3.9'",
"pandas>=2.2.2",
"tavily-python~=0.6.0",
"langfuse~=2.36.2",
"google-api-python-client~=2.143.0",
"google-auth-httplib2~=0.2.0",
"google-auth-oauthlib~=1.2.1",
"google-genai~=1.25.0",
"lancedb==0.15.0 ; python_version >= '3.9'",
"lancedb==0.6.13 ; python_version < '3.9'",
"lancedb==0.15.0",
"tree-sitter>=0.21.3",
"mem0ai~=0.1.114",
"pip>=23.3.1",
Expand All @@ -66,7 +63,6 @@ Download = "https://github.com/MLSysOps/MLE-agent/archive/refs/heads/main.zip"
[project.scripts]
mle-agent = "mle.cli:cli"
mle = "mle.cli:cli"
mle-exp = "exp.cli:cli"

[tool.setuptools.packages.find]
include = ["mle*", "exp*"]
Expand Down
Loading