Skip to content

Commit 21ffdd4

Browse files
authored
Add inline data explorers to Ark (#1124)
This change updates the R kernel to supply inline data explorers in notebook mode (which encompasses both Quarto documents and Positron notebooks). <img width="634" height="316" alt="image" src="https://github.com/user-attachments/assets/fdaa8f42-2fcf-4562-8996-c2bc91214e6d" /> The approach is very symmetric to how the Python kernel provides the same feature (see posit-dev/positron#11732); when a data frame-like object is printed, and we're in a Positron notebook, we open up a new data explorer comm and send it to the front end with a special MIME type.
1 parent de027c5 commit 21ffdd4

9 files changed

Lines changed: 366 additions & 17 deletions

File tree

crates/amalthea/src/comm/data_explorer_comm.rs

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// @generated
22

33
/*---------------------------------------------------------------------------------------------
4-
* Copyright (C) 2024-2025 Posit Software, PBC. All rights reserved.
4+
* Copyright (C) 2024-2026 Posit Software, PBC. All rights reserved.
55
*--------------------------------------------------------------------------------------------*/
66

77
//
@@ -60,6 +60,13 @@ pub struct FilterResult {
6060
pub had_errors: Option<bool>
6161
}
6262

63+
/// Result of setting import options
64+
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
65+
pub struct SetDatasetImportOptionsResult {
66+
/// An error message if setting the options failed
67+
pub error_message: Option<String>
68+
}
69+
6370
/// The current backend state for the data explorer
6471
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
6572
pub struct BackendState {
@@ -703,6 +710,15 @@ pub struct ColumnSelection {
703710
pub spec: ArraySelection
704711
}
705712

713+
/// Import options for file-based data sources. Currently supports options
714+
/// for delimited text files (CSV, TSV).
715+
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
716+
pub struct DatasetImportOptions {
717+
/// Whether the first row contains column headers (for delimited text
718+
/// files)
719+
pub has_header_row: Option<bool>
720+
}
721+
706722
/// Possible values for SortOrder in SearchSchema
707723
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, strum_macros::Display, strum_macros::EnumString)]
708724
pub enum SearchSchemaSortOrder {
@@ -1193,6 +1209,13 @@ pub struct GetColumnProfilesParams {
11931209
pub format_options: FormatOptions,
11941210
}
11951211

1212+
/// Parameters for the SetDatasetImportOptions method.
1213+
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
1214+
pub struct SetDatasetImportOptionsParams {
1215+
/// Import options to apply
1216+
pub options: DatasetImportOptions,
1217+
}
1218+
11961219
/// Parameters for the ReturnColumnProfiles method.
11971220
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
11981221
pub struct ReturnColumnProfilesParams {
@@ -1289,6 +1312,23 @@ pub enum DataExplorerBackendRequest {
12891312
#[serde(rename = "get_column_profiles")]
12901313
GetColumnProfiles(GetColumnProfilesParams),
12911314

1315+
/// Set import options for file-based data sources
1316+
///
1317+
/// Set import options for file-based data sources (like CSV files) and
1318+
/// reimport the data. This method is primarily used by file-based
1319+
/// backends like DuckDB.
1320+
#[serde(rename = "set_dataset_import_options")]
1321+
SetDatasetImportOptions(SetDatasetImportOptionsParams),
1322+
1323+
/// Open a full data explorer for the same data
1324+
///
1325+
/// Creates a new, independent data explorer comm for the same underlying
1326+
/// data. The new comm has its own state (filters, sorts). Used when
1327+
/// promoting an inline notebook data explorer to a full data explorer
1328+
/// panel.
1329+
#[serde(rename = "open_data_explorer")]
1330+
OpenDataExplorer,
1331+
12921332
/// Get the state
12931333
///
12941334
/// Request the current backend state (table metadata, explorer state, and
@@ -1337,6 +1377,12 @@ pub enum DataExplorerBackendReply {
13371377
/// Reply for the get_column_profiles method (no result)
13381378
GetColumnProfilesReply(),
13391379

1380+
/// Result of setting import options
1381+
SetDatasetImportOptionsReply(SetDatasetImportOptionsResult),
1382+
1383+
/// Reply for the open_data_explorer method (no result)
1384+
OpenDataExplorerReply(),
1385+
13401386
/// The current backend state for the data explorer
13411387
GetStateReply(BackendState),
13421388

crates/ark/src/console.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,6 @@ use harp::utils::r_poke_option;
8282
use harp::utils::r_typeof;
8383
use harp::CONSOLE_THREAD_ID;
8484
use libr::R_BaseNamespace;
85-
use libr::R_GlobalEnv;
8685
use libr::R_ProcessEvents;
8786
use libr::R_RunPendingFinalizers;
8887
use libr::Rf_ScalarInteger;

crates/ark/src/console/console_integration.rs

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@
88
//! Help, LSP, UI comm, and frontend method integration for the R console.
99
1010
use super::*;
11+
use crate::data_explorer::r_data_explorer::DataExplorerMode;
12+
use crate::data_explorer::r_data_explorer::InlineDataExplorerData;
13+
use crate::data_explorer::r_data_explorer::InlineDataExplorerShape;
14+
use crate::data_explorer::r_data_explorer::RDataExplorer;
15+
use crate::data_explorer::r_data_explorer::DATA_EXPLORER_COMM_NAME;
1116

1217
/// UI comm integration.
1318
impl Console {
@@ -164,6 +169,55 @@ impl Console {
164169
}
165170
}
166171

172+
/// Inline data explorer integration.
173+
impl Console {
174+
/// Open an inline data explorer for a data frame value and return the MIME
175+
/// type payload to include in the execute result.
176+
pub(super) fn open_inline_data_explorer(
177+
&mut self,
178+
value: SEXP,
179+
) -> anyhow::Result<serde_json::Value> {
180+
let data = RObject::new(value);
181+
182+
// `source` is the R class family (e.g. "tbl_df", "data.table",
183+
// "data.frame"), following the Python kernel convention where `source`
184+
// is the library name ("pandas", "polars").
185+
let source = data
186+
.class()
187+
.ok()
188+
.flatten()
189+
.and_then(|classes| classes.into_iter().next())
190+
.unwrap_or_else(|| String::from("data.frame"));
191+
192+
// `title` is the variable name when available, falling back to
193+
// `source`. For inline explorers we don't have a variable binding, so
194+
// we always use `source` as the title.
195+
let title = source.clone();
196+
197+
let explorer = RDataExplorer::new(title.clone(), data, None, DataExplorerMode::Inline)?;
198+
let shape = &explorer.shape();
199+
let inline_data = InlineDataExplorerData {
200+
version: 1,
201+
comm_id: String::new(), // placeholder, filled after comm_open
202+
shape: InlineDataExplorerShape {
203+
rows: shape.num_rows,
204+
columns: shape.columns.len(),
205+
},
206+
title,
207+
source,
208+
};
209+
210+
let comm_id = self.comm_open_backend(DATA_EXPLORER_COMM_NAME, Box::new(explorer))?;
211+
212+
let inline_data = InlineDataExplorerData {
213+
comm_id,
214+
..inline_data
215+
};
216+
217+
Ok(serde_json::to_value(inline_data)?)
218+
}
219+
}
220+
167221
/// Reference to the UI comm. Returned by `Console::ui_comm()`.
168222
///
169223
/// Existence of this value guarantees the comm is connected.

crates/ark/src/console/console_repl.rs

Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
//! ReadConsole, WriteConsole, and R frontend callbacks.
1212
1313
use super::*;
14+
use crate::data_explorer::r_data_explorer::POSITRON_DATA_EXPLORER_MIME;
1415
use crate::r_task::QueuedRTask;
1516
use crate::r_task::RTask;
1617

@@ -1131,18 +1132,46 @@ impl Console {
11311132
data.insert("text/plain".to_string(), json!(autoprint));
11321133
}
11331134

1134-
// Include HTML representation of data.frame
1135-
unsafe {
1136-
let value = Rf_findVarInFrame(R_GlobalEnv, r_symbol!(".Last.value"));
1137-
if r_is_data_frame(value) {
1138-
match to_html(value) {
1139-
Ok(html) => {
1140-
data.insert("text/html".to_string(), json!(html));
1135+
// Include HTML representation of data.frame and optionally open an
1136+
// inline data explorer in Positron notebook mode. Only do this when
1137+
// there is visible output (autoprint produced text/plain).
1138+
let Ok(value) = harp::environment::last_value() else {
1139+
return data;
1140+
};
1141+
1142+
// If there is no data, return early
1143+
if data.is_empty() {
1144+
return data;
1145+
}
1146+
1147+
// If this is a data frame, add HTML representation and open inline explorer
1148+
// (only in Positron notebook mode)
1149+
if r_is_data_frame(value.sexp) {
1150+
let value = value.sexp;
1151+
match to_html(value) {
1152+
Ok(html) => {
1153+
data.insert("text/html".to_string(), json!(html));
1154+
},
1155+
Err(err) => {
1156+
log::error!("{err:?}");
1157+
},
1158+
};
1159+
1160+
// The inline data explorer is a Positron-specific feature that
1161+
// requires comm support. Other Jupyter frontends don't understand
1162+
// this MIME type, so we gate on the POSITRON env var to avoid
1163+
// sending it to vanilla Jupyter notebooks.
1164+
if self.session_mode == SessionMode::Notebook &&
1165+
std::env::var("POSITRON").as_deref() == Ok("1")
1166+
{
1167+
match self.open_inline_data_explorer(value) {
1168+
Ok(mime_data) => {
1169+
data.insert(POSITRON_DATA_EXPLORER_MIME.to_string(), mime_data);
11411170
},
11421171
Err(err) => {
1143-
log::error!("{:?}", err);
1172+
log::error!("Failed to open inline data explorer: {err:?}");
11441173
},
1145-
};
1174+
}
11461175
}
11471176
}
11481177

crates/ark/src/data_explorer/r_data_explorer.rs

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,35 @@ use crate::r_task;
100100
use crate::r_task::RTask;
101101
use crate::variables::variable::WorkspaceVariableDisplayType;
102102

103+
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
104+
pub enum DataExplorerMode {
105+
Inline,
106+
Full,
107+
}
108+
103109
pub const DATA_EXPLORER_COMM_NAME: &str = "positron.dataExplorer";
110+
pub const POSITRON_DATA_EXPLORER_MIME: &str = "application/vnd.positron.dataExplorer+json";
111+
112+
/// Payload for the `application/vnd.positron.dataExplorer+json` MIME type
113+
/// included in notebook execute results for data frames. This tells Positron's
114+
/// notebook renderer to display an inline data explorer widget.
115+
///
116+
/// Must stay in sync with `ParsedDataExplorerOutput` in Positron's
117+
/// `IPositronNotebookCell.ts`.
118+
#[derive(Clone, Debug, serde::Serialize)]
119+
pub struct InlineDataExplorerData {
120+
pub version: u32,
121+
pub comm_id: String,
122+
pub shape: InlineDataExplorerShape,
123+
pub title: String,
124+
pub source: String,
125+
}
126+
127+
#[derive(Clone, Debug, serde::Serialize)]
128+
pub struct InlineDataExplorerShape {
129+
pub rows: i32,
130+
pub columns: usize,
131+
}
104132

105133
/// A name/value binding pair in an environment.
106134
///
@@ -157,6 +185,10 @@ pub struct RDataExplorer {
157185
/// row indices. This is the set of row indices that are displayed in the
158186
/// data viewer.
159187
view_indices: Option<Vec<i32>>,
188+
189+
/// The display mode for this explorer. `Inline` renders a compact grid
190+
/// in a notebook cell output; `Full` opens the full Data Explorer panel.
191+
explorer_mode: DataExplorerMode,
160192
}
161193

162194
impl std::fmt::Debug for RDataExplorer {
@@ -173,6 +205,7 @@ impl RDataExplorer {
173205
title: String,
174206
data: RObject,
175207
binding: Option<DataObjectEnvInfo>,
208+
explorer_mode: DataExplorerMode,
176209
) -> anyhow::Result<Self> {
177210
let table = Table::new(data);
178211
let shape = Self::get_shape(table.get().clone())?;
@@ -187,9 +220,14 @@ impl RDataExplorer {
187220
sort_keys: vec![],
188221
row_filters: vec![],
189222
col_filters: vec![],
223+
explorer_mode,
190224
})
191225
}
192226

227+
pub(crate) fn shape(&self) -> &DataObjectShape {
228+
&self.shape
229+
}
230+
193231
/// Check the environment bindings for updates to the underlying value
194232
///
195233
/// Returns true if the update was processed; false if the binding has been
@@ -416,13 +454,31 @@ impl RDataExplorer {
416454
DataExplorerBackendRequest::SuggestCodeSyntax => Ok(
417455
DataExplorerBackendReply::SuggestCodeSyntaxReply(self.suggest_code_syntax()),
418456
),
457+
458+
DataExplorerBackendRequest::SetDatasetImportOptions(_) => {
459+
Err(anyhow!("Data Explorer: Not yet supported"))
460+
},
461+
462+
// Promotes an inline data explorer to a full data explorer.
463+
DataExplorerBackendRequest::OpenDataExplorer => {
464+
let explorer = RDataExplorer::new(
465+
self.title.clone(),
466+
self.table.get().clone(),
467+
None,
468+
DataExplorerMode::Full,
469+
)?;
470+
Console::get_mut()
471+
.comm_open_backend(DATA_EXPLORER_COMM_NAME, Box::new(explorer))?;
472+
Ok(DataExplorerBackendReply::OpenDataExplorerReply())
473+
},
419474
}
420475
}
421476
}
422477

423478
impl CommHandler for RDataExplorer {
424479
fn open_metadata(&self) -> serde_json::Value {
425-
serde_json::json!({ "title": self.title })
480+
let inline_only = self.explorer_mode == DataExplorerMode::Inline;
481+
serde_json::json!({ "title": self.title, "inline_only": inline_only })
426482
}
427483

428484
fn handle_msg(&mut self, msg: CommMsg, ctx: &CommHandlerContext) {
@@ -1210,7 +1266,7 @@ pub unsafe extern "C-unwind" fn ps_view_data_frame(
12101266
None
12111267
};
12121268

1213-
let explorer = RDataExplorer::new(title, x, env_info)?;
1269+
let explorer = RDataExplorer::new(title, x, env_info, DataExplorerMode::Full)?;
12141270
Console::get_mut().comm_open_backend(DATA_EXPLORER_COMM_NAME, Box::new(explorer))?;
12151271

12161272
Ok(R_NilValue)

crates/ark/src/variables/r_variables.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ use stdext::spawn;
4242

4343
use crate::console;
4444
use crate::console::Console;
45+
use crate::data_explorer::r_data_explorer::DataExplorerMode;
4546
use crate::data_explorer::r_data_explorer::DataObjectEnvInfo;
4647
use crate::data_explorer::r_data_explorer::RDataExplorer;
4748
use crate::data_explorer::r_data_explorer::DATA_EXPLORER_COMM_NAME;
@@ -357,8 +358,9 @@ impl RVariables {
357358
env,
358359
};
359360

360-
let explorer = RDataExplorer::new(name.clone(), obj, Some(binding))
361-
.map_err(harp::Error::Anyhow)?;
361+
let explorer =
362+
RDataExplorer::new(name.clone(), obj, Some(binding), DataExplorerMode::Full)
363+
.map_err(harp::Error::Anyhow)?;
362364
let viewer_id = Console::get_mut()
363365
.comm_open_backend(DATA_EXPLORER_COMM_NAME, Box::new(explorer))
364366
.map_err(harp::Error::Anyhow)?;

crates/ark/tests/data_explorer.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ use ark::comm_handler::CommHandlerContext;
7272
use ark::comm_handler::EnvironmentChanged;
7373
use ark::data_explorer::format::format_column;
7474
use ark::data_explorer::format::format_string;
75+
use ark::data_explorer::r_data_explorer::DataExplorerMode;
7576
use ark::data_explorer::r_data_explorer::DataObjectEnvInfo;
7677
use ark::data_explorer::r_data_explorer::RDataExplorer;
7778
use ark::r_task::r_task;
@@ -106,7 +107,7 @@ fn open_data_explorer(dataset: String) -> TestSetup {
106107

107108
let inner = r_task(|| unsafe {
108109
let data = RObject::new(Rf_eval(r_symbol!(&dataset), R_GlobalEnv));
109-
let handler = RDataExplorer::new(dataset, data, None).unwrap();
110+
let handler = RDataExplorer::new(dataset, data, None, DataExplorerMode::Full).unwrap();
110111
TestInner(handler, ctx)
111112
});
112113

@@ -131,7 +132,8 @@ fn open_data_explorer_from_expression(expr: &str, bind: Option<&str>) -> anyhow:
131132
name: name.to_string(),
132133
env: RObject::view(R_ENVS.global),
133134
});
134-
let handler = RDataExplorer::new(String::from("obj"), object, binding)?;
135+
let handler =
136+
RDataExplorer::new(String::from("obj"), object, binding, DataExplorerMode::Full)?;
135137
Ok(TestInner(handler, ctx))
136138
})?;
137139

0 commit comments

Comments
 (0)