Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion skills/hotdata-cli/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,10 +131,15 @@ hotdata datasets <dataset_id> [--workspace-id <workspace_id>] [--format table|js
#### Create a dataset
```
hotdata datasets create --label "My Dataset" --file data.csv [--table-name my_dataset] [--workspace-id <workspace_id>]
hotdata datasets create --label "My Dataset" --sql "SELECT * FROM ..." [--table-name my_dataset] [--workspace-id <workspace_id>]
hotdata datasets create --label "My Dataset" --query-id <saved_query_id> [--table-name my_dataset] [--workspace-id <workspace_id>]
```
- `--file` uploads a local file. Omit to pipe data via stdin: `cat data.csv | hotdata datasets create --label "My Dataset"`
- `--sql` creates a dataset from a SQL query result.
- `--query-id` creates a dataset from a previously saved query.
- `--file`, `--sql`, and `--query-id` are mutually exclusive.
- Format is auto-detected from file extension (`.csv`, `.json`, `.parquet`) or file content.
- `--label` is optional when `--file` is provided — defaults to the filename without extension.
- `--label` is optional when `--file` is provided — defaults to the filename without extension. Required for `--sql` and `--query-id`.
- `--table-name` is optional — derived from the label if omitted.

#### Querying datasets
Expand Down
15 changes: 11 additions & 4 deletions src/command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ pub enum DatasetsCommands {
format: String,
},

/// Create a new dataset from a file, piped stdin, or a pre-existing upload ID
/// Create a new dataset from a file, piped stdin, upload ID, or SQL query
Create {
/// Dataset label (derived from filename if omitted)
#[arg(long)]
Expand All @@ -194,18 +194,25 @@ pub enum DatasetsCommands {
table_name: Option<String>,

/// Path to a file to upload (omit to read from stdin)
#[arg(long, conflicts_with = "upload_id")]
#[arg(long, conflicts_with_all = ["upload_id", "sql"])]
file: Option<String>,

/// Skip upload and use a pre-existing upload ID directly
#[arg(long, conflicts_with = "file")]
#[arg(long, conflicts_with_all = ["file", "sql"])]
upload_id: Option<String>,

/// Source format when using --upload-id (csv, json, parquet)
#[arg(long, default_value = "csv", value_parser = ["csv", "json", "parquet"], requires = "upload_id")]
format: String,
},

/// SQL query to create the dataset from
#[arg(long, conflicts_with_all = ["file", "upload_id", "query_id"])]
sql: Option<String>,

/// Saved query ID to create the dataset from
#[arg(long, conflicts_with_all = ["file", "upload_id", "sql"])]
query_id: Option<String>,
},
}


Expand Down
150 changes: 108 additions & 42 deletions src/datasets.rs
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,76 @@ fn upload_from_stdin(
(id, ft.format)
}

pub fn create(
fn create_dataset(
workspace_id: &str,
label: &str,
table_name: Option<&str>,
source: serde_json::Value,
on_failure: Option<Box<dyn FnOnce()>>,
) {
let profile_config = match config::load("default") {
Ok(c) => c,
Err(e) => {
eprintln!("{e}");
std::process::exit(1);
}
};

let api_key = match &profile_config.api_key {
Some(key) if key != "PLACEHOLDER" => key.clone(),
_ => {
eprintln!("error: not authenticated. Run 'hotdata auth login' to log in.");
std::process::exit(1);
}
};

let mut body = json!({ "label": label, "source": source });
if let Some(tn) = table_name {
body["table_name"] = json!(tn);
}

let url = format!("{}/datasets", profile_config.api_url);
let client = reqwest::blocking::Client::new();

let resp = match client
.post(&url)
.header("Authorization", format!("Bearer {api_key}"))
.header("X-Workspace-Id", workspace_id)
.json(&body)
.send()
{
Ok(r) => r,
Err(e) => {
eprintln!("error connecting to API: {e}");
std::process::exit(1);
}
};

if !resp.status().is_success() {
use crossterm::style::Stylize;
eprintln!("{}", crate::util::api_error(resp.text().unwrap_or_default()).red());
if let Some(f) = on_failure {
f();
}
std::process::exit(1);
}

let dataset: CreateResponse = match resp.json() {
Ok(v) => v,
Err(e) => {
eprintln!("error parsing response: {e}");
std::process::exit(1);
}
};

use crossterm::style::Stylize;
println!("{}", "Dataset created".green());
println!("id: {}", dataset.id);
println!("label: {}", dataset.label);
println!("full_name: datasets.main.{}", dataset.table_name);
}

pub fn create_from_upload(
workspace_id: &str,
label: Option<&str>,
table_name: Option<&str>,
Expand Down Expand Up @@ -295,56 +364,53 @@ pub fn create(
};

let source = json!({ "upload_id": upload_id, "format": format });
let mut body = json!({ "label": label, "source": source });
if let Some(tn) = table_name {
body["table_name"] = json!(tn);
}

let url = format!("{}/datasets", profile_config.api_url);

let resp = match client
.post(&url)
.header("Authorization", format!("Bearer {api_key}"))
.header("X-Workspace-Id", workspace_id)
.json(&body)
.send()
{
Ok(r) => r,
Err(e) => {
eprintln!("error connecting to API: {e}");
std::process::exit(1);
}
};

if !resp.status().is_success() {
use crossterm::style::Stylize;
eprintln!("{}", crate::util::api_error(resp.text().unwrap_or_default()).red());
// Only show the resume hint when the upload_id came from a fresh upload
if upload_id_was_uploaded {
let on_failure: Option<Box<dyn FnOnce()>> = if upload_id_was_uploaded {
let uid = upload_id.clone();
Some(Box::new(move || {
use crossterm::style::Stylize;
eprintln!(
"{}",
format!(
"Resume dataset creation without re-uploading by passing --upload-id {upload_id}"
)
.yellow()
format!("Resume dataset creation without re-uploading by passing --upload-id {uid}").yellow()
);
}
std::process::exit(1);
}
}))
} else {
None
};

let dataset: CreateResponse = match resp.json() {
Ok(v) => v,
Err(e) => {
eprintln!("error parsing response: {e}");
create_dataset(workspace_id, label, table_name, source, on_failure);
}

pub fn create_from_query(
workspace_id: &str,
sql: &str,
label: Option<&str>,
table_name: Option<&str>,
) {
let label = match label {
Some(l) => l,
None => {
eprintln!("error: --label is required when using --sql");
std::process::exit(1);
}
};
create_dataset(workspace_id, label, table_name, json!({ "sql": sql }), None);
}

use crossterm::style::Stylize;
println!("{}", "Dataset created".green());
println!("id: {}", dataset.id);
println!("label: {}", dataset.label);
println!("full_name: datasets.main.{}", dataset.table_name);
pub fn create_from_saved_query(
workspace_id: &str,
query_id: &str,
label: Option<&str>,
table_name: Option<&str>,
) {
let label = match label {
Some(l) => l,
None => {
eprintln!("error: --label is required when using --query-id");
std::process::exit(1);
}
};
create_dataset(workspace_id, label, table_name, json!({ "saved_query_id": query_id }), None);
}

pub fn list(workspace_id: &str, limit: Option<u32>, offset: Option<u32>, format: &str) {
Expand Down
10 changes: 8 additions & 2 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,14 @@ fn main() {
Some(DatasetsCommands::List { limit, offset, format }) => {
datasets::list(&workspace_id, limit, offset, &format)
}
Some(DatasetsCommands::Create { label, table_name, file, upload_id, format }) => {
datasets::create(&workspace_id, label.as_deref(), table_name.as_deref(), file.as_deref(), upload_id.as_deref(), &format)
Some(DatasetsCommands::Create { label, table_name, file, upload_id, format, sql, query_id }) => {
if let Some(sql) = sql {
datasets::create_from_query(&workspace_id, &sql, label.as_deref(), table_name.as_deref())
} else if let Some(query_id) = query_id {
datasets::create_from_saved_query(&workspace_id, &query_id, label.as_deref(), table_name.as_deref())
} else {
datasets::create_from_upload(&workspace_id, label.as_deref(), table_name.as_deref(), file.as_deref(), upload_id.as_deref(), &format)
}
}
None => {
use clap::CommandFactory;
Expand Down