diff --git a/skills/hotdata-cli/SKILL.md b/skills/hotdata-cli/SKILL.md index 251e505..5c6e438 100644 --- a/skills/hotdata-cli/SKILL.md +++ b/skills/hotdata-cli/SKILL.md @@ -131,10 +131,15 @@ hotdata datasets [--workspace-id ] [--format table|js #### Create a dataset ``` hotdata datasets create --label "My Dataset" --file data.csv [--table-name my_dataset] [--workspace-id ] +hotdata datasets create --label "My Dataset" --sql "SELECT * FROM ..." [--table-name my_dataset] [--workspace-id ] +hotdata datasets create --label "My Dataset" --query-id [--table-name my_dataset] [--workspace-id ] ``` - `--file` uploads a local file. Omit to pipe data via stdin: `cat data.csv | hotdata datasets create --label "My Dataset"` +- `--sql` creates a dataset from a SQL query result. +- `--query-id` creates a dataset from a previously saved query. +- `--file`, `--sql`, and `--query-id` are mutually exclusive. - Format is auto-detected from file extension (`.csv`, `.json`, `.parquet`) or file content. -- `--label` is optional when `--file` is provided — defaults to the filename without extension. +- `--label` is optional when `--file` is provided — defaults to the filename without extension. Required for `--sql` and `--query-id`. - `--table-name` is optional — derived from the label if omitted. #### Querying datasets diff --git a/src/command.rs b/src/command.rs index c3931cf..b61b212 100644 --- a/src/command.rs +++ b/src/command.rs @@ -183,7 +183,7 @@ pub enum DatasetsCommands { format: String, }, - /// Create a new dataset from a file, piped stdin, or a pre-existing upload ID + /// Create a new dataset from a file, piped stdin, upload ID, or SQL query Create { /// Dataset label (derived from filename if omitted) #[arg(long)] @@ -194,18 +194,25 @@ pub enum DatasetsCommands { table_name: Option, /// Path to a file to upload (omit to read from stdin) - #[arg(long, conflicts_with = "upload_id")] + #[arg(long, conflicts_with_all = ["upload_id", "sql"])] file: Option, /// Skip upload and use a pre-existing upload ID directly - #[arg(long, conflicts_with = "file")] + #[arg(long, conflicts_with_all = ["file", "sql"])] upload_id: Option, /// Source format when using --upload-id (csv, json, parquet) #[arg(long, default_value = "csv", value_parser = ["csv", "json", "parquet"], requires = "upload_id")] format: String, - }, + /// SQL query to create the dataset from + #[arg(long, conflicts_with_all = ["file", "upload_id", "query_id"])] + sql: Option, + + /// Saved query ID to create the dataset from + #[arg(long, conflicts_with_all = ["file", "upload_id", "sql"])] + query_id: Option, + }, } diff --git a/src/datasets.rs b/src/datasets.rs index 970de1c..23d146b 100644 --- a/src/datasets.rs +++ b/src/datasets.rs @@ -220,7 +220,76 @@ fn upload_from_stdin( (id, ft.format) } -pub fn create( +fn create_dataset( + workspace_id: &str, + label: &str, + table_name: Option<&str>, + source: serde_json::Value, + on_failure: Option>, +) { + let profile_config = match config::load("default") { + Ok(c) => c, + Err(e) => { + eprintln!("{e}"); + std::process::exit(1); + } + }; + + let api_key = match &profile_config.api_key { + Some(key) if key != "PLACEHOLDER" => key.clone(), + _ => { + eprintln!("error: not authenticated. Run 'hotdata auth login' to log in."); + std::process::exit(1); + } + }; + + let mut body = json!({ "label": label, "source": source }); + if let Some(tn) = table_name { + body["table_name"] = json!(tn); + } + + let url = format!("{}/datasets", profile_config.api_url); + let client = reqwest::blocking::Client::new(); + + let resp = match client + .post(&url) + .header("Authorization", format!("Bearer {api_key}")) + .header("X-Workspace-Id", workspace_id) + .json(&body) + .send() + { + Ok(r) => r, + Err(e) => { + eprintln!("error connecting to API: {e}"); + std::process::exit(1); + } + }; + + if !resp.status().is_success() { + use crossterm::style::Stylize; + eprintln!("{}", crate::util::api_error(resp.text().unwrap_or_default()).red()); + if let Some(f) = on_failure { + f(); + } + std::process::exit(1); + } + + let dataset: CreateResponse = match resp.json() { + Ok(v) => v, + Err(e) => { + eprintln!("error parsing response: {e}"); + std::process::exit(1); + } + }; + + use crossterm::style::Stylize; + println!("{}", "Dataset created".green()); + println!("id: {}", dataset.id); + println!("label: {}", dataset.label); + println!("full_name: datasets.main.{}", dataset.table_name); +} + +pub fn create_from_upload( workspace_id: &str, label: Option<&str>, table_name: Option<&str>, @@ -295,56 +364,53 @@ pub fn create( }; let source = json!({ "upload_id": upload_id, "format": format }); - let mut body = json!({ "label": label, "source": source }); - if let Some(tn) = table_name { - body["table_name"] = json!(tn); - } - - let url = format!("{}/datasets", profile_config.api_url); - - let resp = match client - .post(&url) - .header("Authorization", format!("Bearer {api_key}")) - .header("X-Workspace-Id", workspace_id) - .json(&body) - .send() - { - Ok(r) => r, - Err(e) => { - eprintln!("error connecting to API: {e}"); - std::process::exit(1); - } - }; - if !resp.status().is_success() { - use crossterm::style::Stylize; - eprintln!("{}", crate::util::api_error(resp.text().unwrap_or_default()).red()); - // Only show the resume hint when the upload_id came from a fresh upload - if upload_id_was_uploaded { + let on_failure: Option> = if upload_id_was_uploaded { + let uid = upload_id.clone(); + Some(Box::new(move || { + use crossterm::style::Stylize; eprintln!( "{}", - format!( - "Resume dataset creation without re-uploading by passing --upload-id {upload_id}" - ) - .yellow() + format!("Resume dataset creation without re-uploading by passing --upload-id {uid}").yellow() ); - } - std::process::exit(1); - } + })) + } else { + None + }; - let dataset: CreateResponse = match resp.json() { - Ok(v) => v, - Err(e) => { - eprintln!("error parsing response: {e}"); + create_dataset(workspace_id, label, table_name, source, on_failure); +} + +pub fn create_from_query( + workspace_id: &str, + sql: &str, + label: Option<&str>, + table_name: Option<&str>, +) { + let label = match label { + Some(l) => l, + None => { + eprintln!("error: --label is required when using --sql"); std::process::exit(1); } }; + create_dataset(workspace_id, label, table_name, json!({ "sql": sql }), None); +} - use crossterm::style::Stylize; - println!("{}", "Dataset created".green()); - println!("id: {}", dataset.id); - println!("label: {}", dataset.label); - println!("full_name: datasets.main.{}", dataset.table_name); +pub fn create_from_saved_query( + workspace_id: &str, + query_id: &str, + label: Option<&str>, + table_name: Option<&str>, +) { + let label = match label { + Some(l) => l, + None => { + eprintln!("error: --label is required when using --query-id"); + std::process::exit(1); + } + }; + create_dataset(workspace_id, label, table_name, json!({ "saved_query_id": query_id }), None); } pub fn list(workspace_id: &str, limit: Option, offset: Option, format: &str) { diff --git a/src/main.rs b/src/main.rs index e93a070..bebd1bb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -71,8 +71,14 @@ fn main() { Some(DatasetsCommands::List { limit, offset, format }) => { datasets::list(&workspace_id, limit, offset, &format) } - Some(DatasetsCommands::Create { label, table_name, file, upload_id, format }) => { - datasets::create(&workspace_id, label.as_deref(), table_name.as_deref(), file.as_deref(), upload_id.as_deref(), &format) + Some(DatasetsCommands::Create { label, table_name, file, upload_id, format, sql, query_id }) => { + if let Some(sql) = sql { + datasets::create_from_query(&workspace_id, &sql, label.as_deref(), table_name.as_deref()) + } else if let Some(query_id) = query_id { + datasets::create_from_saved_query(&workspace_id, &query_id, label.as_deref(), table_name.as_deref()) + } else { + datasets::create_from_upload(&workspace_id, label.as_deref(), table_name.as_deref(), file.as_deref(), upload_id.as_deref(), &format) + } } None => { use clap::CommandFactory;