From 44af9a95038a2345017962b7f2f9624ad37d2dcb Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Wed, 18 Mar 2026 11:53:07 -0700 Subject: [PATCH 1/3] feat(datasets): add --sql and --query-id to datasets create --- src/command.rs | 15 +++++--- src/datasets.rs | 93 ++++++++++++++++++++++++++++++++++++++++++++++++- src/main.rs | 10 ++++-- 3 files changed, 111 insertions(+), 7 deletions(-) diff --git a/src/command.rs b/src/command.rs index c3931cf..b61b212 100644 --- a/src/command.rs +++ b/src/command.rs @@ -183,7 +183,7 @@ pub enum DatasetsCommands { format: String, }, - /// Create a new dataset from a file, piped stdin, or a pre-existing upload ID + /// Create a new dataset from a file, piped stdin, upload ID, or SQL query Create { /// Dataset label (derived from filename if omitted) #[arg(long)] @@ -194,18 +194,25 @@ pub enum DatasetsCommands { table_name: Option, /// Path to a file to upload (omit to read from stdin) - #[arg(long, conflicts_with = "upload_id")] + #[arg(long, conflicts_with_all = ["upload_id", "sql"])] file: Option, /// Skip upload and use a pre-existing upload ID directly - #[arg(long, conflicts_with = "file")] + #[arg(long, conflicts_with_all = ["file", "sql"])] upload_id: Option, /// Source format when using --upload-id (csv, json, parquet) #[arg(long, default_value = "csv", value_parser = ["csv", "json", "parquet"], requires = "upload_id")] format: String, - }, + /// SQL query to create the dataset from + #[arg(long, conflicts_with_all = ["file", "upload_id", "query_id"])] + sql: Option, + + /// Saved query ID to create the dataset from + #[arg(long, conflicts_with_all = ["file", "upload_id", "sql"])] + query_id: Option, + }, } diff --git a/src/datasets.rs b/src/datasets.rs index 970de1c..279f890 100644 --- a/src/datasets.rs +++ b/src/datasets.rs @@ -299,7 +299,6 @@ pub fn create( if let Some(tn) = table_name { body["table_name"] = json!(tn); } - let url = format!("{}/datasets", profile_config.api_url); let resp = match client @@ -347,6 +346,98 @@ pub fn create( println!("full_name: datasets.main.{}", dataset.table_name); } +fn create_from_source( + workspace_id: &str, + source: serde_json::Value, + label: Option<&str>, + table_name: Option<&str>, + label_required_hint: &str, +) { + let profile_config = match config::load("default") { + Ok(c) => c, + Err(e) => { + eprintln!("{e}"); + std::process::exit(1); + } + }; + + let api_key = match &profile_config.api_key { + Some(key) if key != "PLACEHOLDER" => key.clone(), + _ => { + eprintln!("error: not authenticated. Run 'hotdata auth login' to log in."); + std::process::exit(1); + } + }; + + let label = match label { + Some(l) => l, + None => { + eprintln!("error: --label is required when using {label_required_hint}"); + std::process::exit(1); + } + }; + + let mut body = json!({ "label": label, "source": source }); + if let Some(tn) = table_name { + body["table_name"] = json!(tn); + } + + let url = format!("{}/datasets", profile_config.api_url); + let client = reqwest::blocking::Client::new(); + + let resp = match client + .post(&url) + .header("Authorization", format!("Bearer {api_key}")) + .header("X-Workspace-Id", workspace_id) + .json(&body) + .send() + { + Ok(r) => r, + Err(e) => { + eprintln!("error connecting to API: {e}"); + std::process::exit(1); + } + }; + + if !resp.status().is_success() { + use crossterm::style::Stylize; + eprintln!("{}", crate::util::api_error(resp.text().unwrap_or_default()).red()); + std::process::exit(1); + } + + let dataset: CreateResponse = match resp.json() { + Ok(v) => v, + Err(e) => { + eprintln!("error parsing response: {e}"); + std::process::exit(1); + } + }; + + use crossterm::style::Stylize; + println!("{}", "Dataset created".green()); + println!("id: {}", dataset.id); + println!("label: {}", dataset.label); + println!("full_name: datasets.main.{}", dataset.table_name); +} + +pub fn create_from_query( + workspace_id: &str, + sql: &str, + label: Option<&str>, + table_name: Option<&str>, +) { + create_from_source(workspace_id, json!({ "sql": sql }), label, table_name, "--sql"); +} + +pub fn create_from_saved_query( + workspace_id: &str, + query_id: &str, + label: Option<&str>, + table_name: Option<&str>, +) { + create_from_source(workspace_id, json!({ "saved_query_id": query_id }), label, table_name, "--query-id"); +} + pub fn list(workspace_id: &str, limit: Option, offset: Option, format: &str) { let profile_config = match config::load("default") { Ok(c) => c, diff --git a/src/main.rs b/src/main.rs index e93a070..49ee036 100644 --- a/src/main.rs +++ b/src/main.rs @@ -71,8 +71,14 @@ fn main() { Some(DatasetsCommands::List { limit, offset, format }) => { datasets::list(&workspace_id, limit, offset, &format) } - Some(DatasetsCommands::Create { label, table_name, file, upload_id, format }) => { - datasets::create(&workspace_id, label.as_deref(), table_name.as_deref(), file.as_deref(), upload_id.as_deref(), &format) + Some(DatasetsCommands::Create { label, table_name, file, upload_id, format, sql, query_id }) => { + if let Some(sql) = sql { + datasets::create_from_query(&workspace_id, &sql, label.as_deref(), table_name.as_deref()) + } else if let Some(query_id) = query_id { + datasets::create_from_saved_query(&workspace_id, &query_id, label.as_deref(), table_name.as_deref()) + } else { + datasets::create(&workspace_id, label.as_deref(), table_name.as_deref(), file.as_deref(), upload_id.as_deref(), &format) + } } None => { use clap::CommandFactory; From 30e82172af6293d895f6ccf484a2e8b454486473 Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Wed, 18 Mar 2026 13:00:41 -0700 Subject: [PATCH 2/3] docs(skill): add --sql and --query-id to dataset create docs --- skills/hotdata-cli/SKILL.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/skills/hotdata-cli/SKILL.md b/skills/hotdata-cli/SKILL.md index 251e505..5c6e438 100644 --- a/skills/hotdata-cli/SKILL.md +++ b/skills/hotdata-cli/SKILL.md @@ -131,10 +131,15 @@ hotdata datasets [--workspace-id ] [--format table|js #### Create a dataset ``` hotdata datasets create --label "My Dataset" --file data.csv [--table-name my_dataset] [--workspace-id ] +hotdata datasets create --label "My Dataset" --sql "SELECT * FROM ..." [--table-name my_dataset] [--workspace-id ] +hotdata datasets create --label "My Dataset" --query-id [--table-name my_dataset] [--workspace-id ] ``` - `--file` uploads a local file. Omit to pipe data via stdin: `cat data.csv | hotdata datasets create --label "My Dataset"` +- `--sql` creates a dataset from a SQL query result. +- `--query-id` creates a dataset from a previously saved query. +- `--file`, `--sql`, and `--query-id` are mutually exclusive. - Format is auto-detected from file extension (`.csv`, `.json`, `.parquet`) or file content. -- `--label` is optional when `--file` is provided — defaults to the filename without extension. +- `--label` is optional when `--file` is provided — defaults to the filename without extension. Required for `--sql` and `--query-id`. - `--table-name` is optional — derived from the label if omitted. #### Querying datasets From ccd32bf73e02faeed4939de45bbd9f98099153e6 Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Wed, 18 Mar 2026 14:25:51 -0700 Subject: [PATCH 3/3] refactor(datasets): unify create paths with shared create_dataset helper --- src/datasets.rs | 193 +++++++++++++++++++++--------------------------- src/main.rs | 2 +- 2 files changed, 85 insertions(+), 110 deletions(-) diff --git a/src/datasets.rs b/src/datasets.rs index 279f890..23d146b 100644 --- a/src/datasets.rs +++ b/src/datasets.rs @@ -220,13 +220,12 @@ fn upload_from_stdin( (id, ft.format) } -pub fn create( +fn create_dataset( workspace_id: &str, - label: Option<&str>, + label: &str, table_name: Option<&str>, - file: Option<&str>, - upload_id: Option<&str>, - source_format: &str, + source: serde_json::Value, + on_failure: Option>, ) { let profile_config = match config::load("default") { Ok(c) => c, @@ -244,62 +243,13 @@ pub fn create( } }; - let label_derived; - let label: &str = match label { - Some(l) => l, - None => match file { - Some(path) => { - label_derived = Path::new(path) - .file_stem() - .and_then(|s| s.to_str()) - .unwrap_or("dataset") - .to_string(); - &label_derived - } - None => { - if upload_id.is_some() { - eprintln!("error: no label provided. Use --label to name the dataset."); - std::process::exit(1); - } - match stdin_redirect_filename() { - Some(name) => { - label_derived = name; - &label_derived - } - None => { - eprintln!("error: no label provided. Use --label to name the dataset."); - std::process::exit(1); - } - } - } - }, - }; - - let client = reqwest::blocking::Client::new(); - - let (upload_id, format, upload_id_was_uploaded): (String, &str, bool) = if let Some(id) = upload_id { - (id.to_string(), source_format, false) - } else { - let (id, fmt) = match file { - Some(path) => upload_from_file(&client, &api_key, workspace_id, &profile_config.api_url, path), - None => { - use std::io::IsTerminal; - if std::io::stdin().is_terminal() { - eprintln!("error: no input data. Use --file , --upload-id , or pipe data via stdin."); - std::process::exit(1); - } - upload_from_stdin(&client, &api_key, workspace_id, &profile_config.api_url) - } - }; - (id, fmt, true) - }; - - let source = json!({ "upload_id": upload_id, "format": format }); let mut body = json!({ "label": label, "source": source }); if let Some(tn) = table_name { body["table_name"] = json!(tn); } + let url = format!("{}/datasets", profile_config.api_url); + let client = reqwest::blocking::Client::new(); let resp = match client .post(&url) @@ -318,15 +268,8 @@ pub fn create( if !resp.status().is_success() { use crossterm::style::Stylize; eprintln!("{}", crate::util::api_error(resp.text().unwrap_or_default()).red()); - // Only show the resume hint when the upload_id came from a fresh upload - if upload_id_was_uploaded { - eprintln!( - "{}", - format!( - "Resume dataset creation without re-uploading by passing --upload-id {upload_id}" - ) - .yellow() - ); + if let Some(f) = on_failure { + f(); } std::process::exit(1); } @@ -346,12 +289,13 @@ pub fn create( println!("full_name: datasets.main.{}", dataset.table_name); } -fn create_from_source( +pub fn create_from_upload( workspace_id: &str, - source: serde_json::Value, label: Option<&str>, table_name: Option<&str>, - label_required_hint: &str, + file: Option<&str>, + upload_id: Option<&str>, + source_format: &str, ) { let profile_config = match config::load("default") { Ok(c) => c, @@ -369,55 +313,72 @@ fn create_from_source( } }; - let label = match label { + let label_derived; + let label: &str = match label { Some(l) => l, - None => { - eprintln!("error: --label is required when using {label_required_hint}"); - std::process::exit(1); - } + None => match file { + Some(path) => { + label_derived = Path::new(path) + .file_stem() + .and_then(|s| s.to_str()) + .unwrap_or("dataset") + .to_string(); + &label_derived + } + None => { + if upload_id.is_some() { + eprintln!("error: no label provided. Use --label to name the dataset."); + std::process::exit(1); + } + match stdin_redirect_filename() { + Some(name) => { + label_derived = name; + &label_derived + } + None => { + eprintln!("error: no label provided. Use --label to name the dataset."); + std::process::exit(1); + } + } + } + }, }; - let mut body = json!({ "label": label, "source": source }); - if let Some(tn) = table_name { - body["table_name"] = json!(tn); - } - - let url = format!("{}/datasets", profile_config.api_url); let client = reqwest::blocking::Client::new(); - let resp = match client - .post(&url) - .header("Authorization", format!("Bearer {api_key}")) - .header("X-Workspace-Id", workspace_id) - .json(&body) - .send() - { - Ok(r) => r, - Err(e) => { - eprintln!("error connecting to API: {e}"); - std::process::exit(1); - } + let (upload_id, format, upload_id_was_uploaded): (String, &str, bool) = if let Some(id) = upload_id { + (id.to_string(), source_format, false) + } else { + let (id, fmt) = match file { + Some(path) => upload_from_file(&client, &api_key, workspace_id, &profile_config.api_url, path), + None => { + use std::io::IsTerminal; + if std::io::stdin().is_terminal() { + eprintln!("error: no input data. Use --file , --upload-id , or pipe data via stdin."); + std::process::exit(1); + } + upload_from_stdin(&client, &api_key, workspace_id, &profile_config.api_url) + } + }; + (id, fmt, true) }; - if !resp.status().is_success() { - use crossterm::style::Stylize; - eprintln!("{}", crate::util::api_error(resp.text().unwrap_or_default()).red()); - std::process::exit(1); - } + let source = json!({ "upload_id": upload_id, "format": format }); - let dataset: CreateResponse = match resp.json() { - Ok(v) => v, - Err(e) => { - eprintln!("error parsing response: {e}"); - std::process::exit(1); - } + let on_failure: Option> = if upload_id_was_uploaded { + let uid = upload_id.clone(); + Some(Box::new(move || { + use crossterm::style::Stylize; + eprintln!( + "{}", + format!("Resume dataset creation without re-uploading by passing --upload-id {uid}").yellow() + ); + })) + } else { + None }; - use crossterm::style::Stylize; - println!("{}", "Dataset created".green()); - println!("id: {}", dataset.id); - println!("label: {}", dataset.label); - println!("full_name: datasets.main.{}", dataset.table_name); + create_dataset(workspace_id, label, table_name, source, on_failure); } pub fn create_from_query( @@ -426,7 +387,14 @@ pub fn create_from_query( label: Option<&str>, table_name: Option<&str>, ) { - create_from_source(workspace_id, json!({ "sql": sql }), label, table_name, "--sql"); + let label = match label { + Some(l) => l, + None => { + eprintln!("error: --label is required when using --sql"); + std::process::exit(1); + } + }; + create_dataset(workspace_id, label, table_name, json!({ "sql": sql }), None); } pub fn create_from_saved_query( @@ -435,7 +403,14 @@ pub fn create_from_saved_query( label: Option<&str>, table_name: Option<&str>, ) { - create_from_source(workspace_id, json!({ "saved_query_id": query_id }), label, table_name, "--query-id"); + let label = match label { + Some(l) => l, + None => { + eprintln!("error: --label is required when using --query-id"); + std::process::exit(1); + } + }; + create_dataset(workspace_id, label, table_name, json!({ "saved_query_id": query_id }), None); } pub fn list(workspace_id: &str, limit: Option, offset: Option, format: &str) { diff --git a/src/main.rs b/src/main.rs index 49ee036..bebd1bb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -77,7 +77,7 @@ fn main() { } else if let Some(query_id) = query_id { datasets::create_from_saved_query(&workspace_id, &query_id, label.as_deref(), table_name.as_deref()) } else { - datasets::create(&workspace_id, label.as_deref(), table_name.as_deref(), file.as_deref(), upload_id.as_deref(), &format) + datasets::create_from_upload(&workspace_id, label.as_deref(), table_name.as_deref(), file.as_deref(), upload_id.as_deref(), &format) } } None => {