From 9d490f5ddf001d0de697492c6aa736bb33aa5870 Mon Sep 17 00:00:00 2001 From: stanbrub Date: Fri, 13 Mar 2026 14:09:04 -0600 Subject: [PATCH 01/25] Disabled some benchmarks and scaled --- .github/resources/adhoc-scale-benchmark.properties | 2 +- .../tests/standard/aggby/BasicMathComboTest.java | 9 +++++---- .../benchmark/tests/standard/join/NaturalJoinTest.java | 2 +- .../benchmark/tests/standard/sort/SortComboTest.java | 2 +- .../tests/standard/updateby/RollingComboTest.java | 4 ++-- .../benchmark/tests/standard/where/WhereTest.java | 2 +- 6 files changed, 11 insertions(+), 10 deletions(-) diff --git a/.github/resources/adhoc-scale-benchmark.properties b/.github/resources/adhoc-scale-benchmark.properties index adab86ad..31a2fd15 100644 --- a/.github/resources/adhoc-scale-benchmark.properties +++ b/.github/resources/adhoc-scale-benchmark.properties @@ -15,7 +15,7 @@ schema.registry.addr=redpanda:8081 kafka.consumer.addr=redpanda:29092 # Default timeout to complete processes (Executing queries, generating records) -default.completion.timeout=10 minutes +default.completion.timeout=20 minutes # Default data distribution for column data (random, ascending, descending, runlength) default.data.distribution=${baseDistrib} diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/aggby/BasicMathComboTest.java b/src/it/java/io/deephaven/benchmark/tests/standard/aggby/BasicMathComboTest.java index 2614954e..0214e280 100644 --- a/src/it/java/io/deephaven/benchmark/tests/standard/aggby/BasicMathComboTest.java +++ b/src/it/java/io/deephaven/benchmark/tests/standard/aggby/BasicMathComboTest.java @@ -12,7 +12,7 @@ public class BasicMathComboTest { @BeforeEach void setup() { - runner.setRowFactor(3); + runner.setRowFactor(2); runner.tables("source"); var setupStr = """ @@ -28,19 +28,19 @@ void setup() { @Test void mathComboAggBy7Ops0Groups() { - runner.setScaleFactors(20, 9); + runner.setScaleFactors(40, 15); var q = "source.agg_by(aggs)"; runner.test("MathCombo-AggBy- 7 Ops No Groups", 1, q, "num1", "num2"); } - @Test + @Test @Disabled void mathComboAggBy7Ops1Group() { runner.setScaleFactors(9, 4); var q = "source.agg_by(aggs, by=['key1'])"; runner.test("MathCombo-AggBy- 7 Ops 1 Group 100 Unique Vals ", 100, q, "key1", "num1", "num2"); } - @Test + @Test @Disabled void mathComboAggBy7Ops2Groups() { runner.setScaleFactors(2, 1); var q = "source.agg_by(aggs, by=['key1', 'key2'])"; @@ -49,6 +49,7 @@ void mathComboAggBy7Ops2Groups() { @Test void mathComboAggBy7Ops3Groups() { + runner.setScaleFactors(2, 1); var q = "source.agg_by(aggs, by=['key1', 'key2', 'key3'])"; runner.test("MathCombo-AggBy- 7 Ops 3 Groups 100K Unique Combos ", 90900, q, "key1", "key2", "key3", "num1", "num2"); diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/join/NaturalJoinTest.java b/src/it/java/io/deephaven/benchmark/tests/standard/join/NaturalJoinTest.java index fc8406be..7c7dd2df 100644 --- a/src/it/java/io/deephaven/benchmark/tests/standard/join/NaturalJoinTest.java +++ b/src/it/java/io/deephaven/benchmark/tests/standard/join/NaturalJoinTest.java @@ -23,7 +23,7 @@ void NaturalJoinOn1Col() { runner.test("NaturalJoin- Join On 1 Col", q, "key5", "num1"); } - @Test + @Test @Disabled void NaturalJoinOn2Cols() { setup(6); var q = "source.natural_join(right, on=['key1 = r_wild', 'key2 = r_key2'])"; diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/sort/SortComboTest.java b/src/it/java/io/deephaven/benchmark/tests/standard/sort/SortComboTest.java index f89b2eed..b444508f 100644 --- a/src/it/java/io/deephaven/benchmark/tests/standard/sort/SortComboTest.java +++ b/src/it/java/io/deephaven/benchmark/tests/standard/sort/SortComboTest.java @@ -23,7 +23,7 @@ void sort2ColsAscendDescend() { runner.test("Sort- Both Directions 2 Cols", q, "key1", "key2", "num1"); } - @Test + @Test @Disabled void sort3ColsAscendDescend() { var q = """ source.sort(order_by=['key1', 'key2', 'key3'], order=[SortDirection.ASCENDING, SortDirection.DESCENDING, diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/updateby/RollingComboTest.java b/src/it/java/io/deephaven/benchmark/tests/standard/updateby/RollingComboTest.java index 1516abb5..7d40f933 100644 --- a/src/it/java/io/deephaven/benchmark/tests/standard/updateby/RollingComboTest.java +++ b/src/it/java/io/deephaven/benchmark/tests/standard/updateby/RollingComboTest.java @@ -61,7 +61,7 @@ void rollingCombo0Groups6Ops() { runner.test("RollingCombo- No Groups 12 Cols", q, "num1", "num2", "timestamp"); } - @Test + @Test @Disabled void rollingCombo1Groups6Ops() { setup.factors(2, 1, 1); runner.addSetupQuery(group100); @@ -71,7 +71,7 @@ void rollingCombo1Groups6Ops() { runner.test("RollingCombo- 1 Groups 100 Unique Vals", q, "key1", "num1", "num2", "timestamp"); } - @Test + @Test @Disabled void rollingCombo2Groups6Ops() { setup.factors(1, 2, 1); runner.addSetupQuery(group10K); diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/where/WhereTest.java b/src/it/java/io/deephaven/benchmark/tests/standard/where/WhereTest.java index 02dd605a..4a90e96f 100644 --- a/src/it/java/io/deephaven/benchmark/tests/standard/where/WhereTest.java +++ b/src/it/java/io/deephaven/benchmark/tests/standard/where/WhereTest.java @@ -26,7 +26,7 @@ void where1Filter() { runner.test("Where- 1 Filter", q, "key1", "num1"); } - @Test + @Test @Disabled void where2Filters() { runner.setScaleFactors(310, 300); var q = """ From 47f066f8a75c012faa9d95f06c8f8ca962c40c41 Mon Sep 17 00:00:00 2001 From: stanbrub Date: Fri, 13 Mar 2026 17:58:57 -0600 Subject: [PATCH 02/25] Scaled up basic math combo --- .../benchmark/tests/standard/aggby/BasicMathComboTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/aggby/BasicMathComboTest.java b/src/it/java/io/deephaven/benchmark/tests/standard/aggby/BasicMathComboTest.java index 0214e280..b9176a31 100644 --- a/src/it/java/io/deephaven/benchmark/tests/standard/aggby/BasicMathComboTest.java +++ b/src/it/java/io/deephaven/benchmark/tests/standard/aggby/BasicMathComboTest.java @@ -28,7 +28,7 @@ void setup() { @Test void mathComboAggBy7Ops0Groups() { - runner.setScaleFactors(40, 15); + runner.setScaleFactors(50, 25); var q = "source.agg_by(aggs)"; runner.test("MathCombo-AggBy- 7 Ops No Groups", 1, q, "num1", "num2"); } From 15cf1f4d9c29b68bad4682005cd0c4407aef5bee Mon Sep 17 00:00:00 2001 From: stanbrub Date: Fri, 20 Mar 2026 17:49:29 -0600 Subject: [PATCH 03/25] Added a Local Parquet Generator as opposed to going through Kafka --- pom.xml | 5 + .../tests/standard/StandardTestRunner.java | 6 +- .../io/deephaven/benchmark/api/Bench.java | 12 ++ .../deephaven/benchmark/api/BenchTable.java | 76 +++++++- .../generator/AvroKafkaGenerator.java | 10 +- .../generator/LocalParquetGenerator.java | 179 ++++++++++++++++++ .../io/deephaven/benchmark/util/Filer.java | 21 ++ 7 files changed, 299 insertions(+), 10 deletions(-) create mode 100644 src/main/java/io/deephaven/benchmark/generator/LocalParquetGenerator.java diff --git a/pom.xml b/pom.xml index 304b6632..490b2691 100644 --- a/pom.xml +++ b/pom.xml @@ -276,6 +276,11 @@ deephaven-java-client-barrage-dagger 41.3 + + blue.strategic.parquet + parquet-floor + 1.64 + io.deephaven deephaven-log-to-slf4j diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java b/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java index 3ca12176..09530e72 100644 --- a/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java +++ b/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java @@ -206,7 +206,7 @@ long getMaxExpectedRowCount(long expectedRowCount, long scaleFactor) { } String getReadOperation(int scaleFactor, long rowCount, String... loadColumns) { - var headRows = (rowCount >= getGeneratedRowCount())?"":".head(${rows})"; + var headRows = (rowCount >= getGeneratedRowCount()) ? "" : ".head(${rows})"; if (scaleFactor > 1 && mainTable.equals("timed") && Arrays.asList(loadColumns).contains("timestamp")) { var read = """ merge([ @@ -215,7 +215,7 @@ String getReadOperation(int scaleFactor, long rowCount, String... loadColumns) { 'timestamp=timestamp.plusMillis((long)(ii / ${rows}) * ${rows})' ]).select() """; - read = read.replace("${headRows}",headRows); + read = read.replace("${headRows}", headRows); return read.replace("${scaleFactor}", "" + scaleFactor).replace("${rows}", "" + rowCount); } @@ -226,7 +226,7 @@ String getReadOperation(int scaleFactor, long rowCount, String... loadColumns) { read = "merge([${readTable}] * ${scaleFactor})".replace("${readTable}", read); read = read.replace("${scaleFactor}", "" + scaleFactor); } - return read.replace("${headRows}",headRows).replace("${rows}", "" + rowCount); + return read.replace("${headRows}", headRows).replace("${rows}", "" + rowCount); } String getStaticQuery(String name, String operation, long rowCount, String... loadColumns) { diff --git a/src/main/java/io/deephaven/benchmark/api/Bench.java b/src/main/java/io/deephaven/benchmark/api/Bench.java index c4330dd9..182f6bb1 100644 --- a/src/main/java/io/deephaven/benchmark/api/Bench.java +++ b/src/main/java/io/deephaven/benchmark/api/Bench.java @@ -27,6 +27,11 @@ final public class Bench { * The root benchmark result directory */ static final public Path rootOutputDir = Paths.get("results"); + /** + * The root benchmark result directory + */ + static final public Path tmpDir = createTmpDir(); + /** * The name of the benchmark results csv file */ @@ -292,5 +297,12 @@ static private void setSystemProperties() { System.setProperty("timestamp.test.results", "false"); } } + + static private Path createTmpDir() { + var f = Filer.createFile(System.getProperty("java.io.tmpdir"), "test.delete.me"); + Filer.putFileText(f, "test write and delete"); + Filer.delete(f); + return f.getParent(); + } } diff --git a/src/main/java/io/deephaven/benchmark/api/BenchTable.java b/src/main/java/io/deephaven/benchmark/api/BenchTable.java index b00c00e3..93fdd5a1 100644 --- a/src/main/java/io/deephaven/benchmark/api/BenchTable.java +++ b/src/main/java/io/deephaven/benchmark/api/BenchTable.java @@ -197,14 +197,55 @@ public boolean generateParquet() { if (rowPauseMillis < 0) withRowPause(0, ChronoUnit.MILLIS); - bench.awaitCompletion(generateWithAvro()); - Log.info("Produce Data Duration: " + timer.duration().toMillis()); + var m = bench.awaitCompletion(generateWithAvro()); + Log.info("Produce Send Rate: %.2f recs/sec", m.getValue("send.rate")); + Log.info("Produce Data Duration: %d secs", timer.duration().toMillis()); timer = Timer.start(); q = replaceTableAndGeneratorFields(kafkaToParquetQuery); bench.query(q).execute(); - Log.info("DH Write Table Duration: " + timer.duration().toMillis()); + Log.info("DH Write Table Duration: %d secs", timer.duration().toMillis()); + return true; + } + + /** + * Generate the table synchronously to a parquet file in the engine's data directory. If a parquet file already + * exists in the Deephaven data directory that matches this table definition, use it and skip generation. + * Note: This is the same as generateParquet() except it generates the parquet file directly to the + * engine's data directory without going through kafka. As such, it will not work when the test runner and the + * engine are not co-located. + * + * @return true if file was generated, otherwise false + */ + public boolean generateLocalParquet() { + columns.setDefaultDistribution(getDefaultDistro()); + String q = replaceTableAndGeneratorFields(useExistingParquetQuery); + + AtomicBoolean usedExistingParquet = new AtomicBoolean(false); + bench.query(q).fetchAfter("used_existing_parquet_" + tableName, table -> { + usedExistingParquet.set(table.getValue(0, "UsedExistingParquet").toString().equalsIgnoreCase("true")); + }).execute(); + + if (usedExistingParquet.get()) { + Log.info("Using existing table '%s' with %s rows", tableName, getRowCount()); + return false; + } + Log.info("Generating table '%s' with %s rows", tableName, getRowCount()); + var timer = Timer.start(); + + if (rowPauseMillis < 0) + withRowPause(0, ChronoUnit.MILLIS); + + var m = bench.awaitCompletion(generateWithLocalParquet()); + Log.info("Produce Send Rate: %.2f recs/sec", m.getValue("send.rate")); + Log.info("Produce Data Duration: %d secs", timer.duration().toMillis()); + timer = Timer.start(); + + q = replaceTableAndGeneratorFields(localToParquetQuery); + bench.query(q).execute(); + + Log.info("DH Write Table Duration: %d secs", timer.duration().toMillis()); return true; } @@ -236,6 +277,12 @@ private Future generateWithProtobuf() { generator = new ProtobufKafkaGenerator(bootstrapServer, schemaRegistry, tableName, columns, getCompression()); return generator.produce(getRowPause(), getRowCount(), getRunDuration()); } + + private Future generateWithLocalParquet() { + String parquetFile = Bench.tmpDir.resolve("local.tmp.parquet").toAbsolutePath().toString(); + generator = new LocalParquetGenerator(parquetFile, tableName, columns, getCompression()); + return generator.produce(getRowPause(), getRowCount(), getRunDuration()); + } private int getRowPause() { if (rowPauseMillis >= 0) @@ -302,6 +349,7 @@ private String replaceTableAndGeneratorFields(String query) { String compression = String.format(", compression_codec_name='%s'", codec); return query.replace("${table.name}", tableName) + .replace("${local.tmp.dir}", Bench.tmpDir.toAbsolutePath().toString()) .replace("${compression.codec}", compression) .replace("${max.dict.keys}", ", max_dictionary_keys=2000000") .replace("${max.dict.bytes}", ", max_dictionary_size=16777216") @@ -317,6 +365,7 @@ private String replaceTableAndGeneratorFields(String query) { static final String generatorDefValues = """ # Define files and generator configuration + local_tmp_parquet = '${local.tmp.dir}/local.tmp.parquet' table_parquet = '/data/${table.name}.parquet' table_gen_parquet = '/data/${table.definition.id}.gen.parquet' table_gen_def_text = '''${table.definition}''' @@ -386,5 +435,26 @@ with open(table_gen_def_file, 'w') as f: from deephaven import garbage_collect garbage_collect() """; + + static final String localToParquetQuery = """ + # Create a Parquet file from a Kafka topic + import jpy, os + + if os.path.exists(table_parquet): + os.remove(table_parquet) + + with open(table_gen_def_file, 'w') as f: + f.write(table_gen_def_text) + + import shutil + shutil.move(local_tmp_parquet, ${table.gen.parquet}) + + os.link(table_gen_parquet, table_parquet) + + del ${table.name} + + from deephaven import garbage_collect + garbage_collect() + """; } diff --git a/src/main/java/io/deephaven/benchmark/generator/AvroKafkaGenerator.java b/src/main/java/io/deephaven/benchmark/generator/AvroKafkaGenerator.java index db662a88..e2d741e4 100644 --- a/src/main/java/io/deephaven/benchmark/generator/AvroKafkaGenerator.java +++ b/src/main/java/io/deephaven/benchmark/generator/AvroKafkaGenerator.java @@ -128,11 +128,13 @@ private Producer createProducer(String bootstrapServer, S props.put(KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class); props.put(VALUE_SERIALIZER_CLASS_CONFIG, KafkaAvroSerializer.class); props.put("schema.registry.url", schemaRegistryUrl); - props.put(ACKS_CONFIG, "0"); + props.put(ENABLE_IDEMPOTENCE_CONFIG, "true"); + props.put(MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION, "1"); + props.put(ACKS_CONFIG, "all"); props.put(COMPRESSION_TYPE_CONFIG, getCompression(compression)); - props.put(BATCH_SIZE_CONFIG, 16384); - props.put(BUFFER_MEMORY_CONFIG, 16384 * 4); - props.put(LINGER_MS_CONFIG, 50); + props.put(BATCH_SIZE_CONFIG, "512000"); + props.put(BUFFER_MEMORY_CONFIG, "67108864"); + props.put(LINGER_MS_CONFIG, 10); return new KafkaProducer<>(props); } diff --git a/src/main/java/io/deephaven/benchmark/generator/LocalParquetGenerator.java b/src/main/java/io/deephaven/benchmark/generator/LocalParquetGenerator.java new file mode 100644 index 00000000..7ff0009b --- /dev/null +++ b/src/main/java/io/deephaven/benchmark/generator/LocalParquetGenerator.java @@ -0,0 +1,179 @@ +/* Copyright (c) 2026-2026 Deephaven Data Labs and Patent Pending */ +package io.deephaven.benchmark.generator; + +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.*; +import java.util.concurrent.*; +import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.parquet.schema.MessageType; +import org.apache.parquet.schema.MessageTypeParser; +import io.deephaven.benchmark.metric.Metrics; +import io.deephaven.benchmark.util.Log; +import io.deephaven.benchmark.util.Threads; +import blue.strategic.parquet.*; + +/** + * Generator that produces rows to a local Parquet file according to the provided column definitions. + * consumer. + */ +public class LocalParquetGenerator implements Generator { + final private ExecutorService queue = Threads.single("ProtobufKafkaGenerator"); + final private Path parquetOut; + final private ParquetWriter writer; + final private ColumnDefs columnDefs; + final private String topic; + final private MessageType schema; + final private AtomicBoolean isClosed = new AtomicBoolean(false); + + /** + * Initialize with kafka server and schema registry locations, kafka topic, column definitions, and compression + * scheme + * + * @param bootstrapServers the kafka external location (ex. localhost:9092) + * @param schemaRegistryUrl the ReST schema registry location (ex. localhost:8081) + * @param topic the kafka topic to produce record to (ex. mytable) + * @param columnDefs the column definitions specifying what the data looks like + * @param compression one of Kafka's ProducerConfig.COMPRESSION_TYPE_CONFIG schemes + */ + public LocalParquetGenerator(String parquetFile, String topic, ColumnDefs columnDefs, String compression) { + this.topic = topic; + this.columnDefs = columnDefs; + this.schema = MessageTypeParser.parseMessageType(getSchemaMessage(topic, columnDefs)); + this.parquetOut = Paths.get(parquetFile); + this.writer = createParquetWriter(schema, parquetOut); + } + + /** + * Produce a maximum number of records to a Kafka topic asynchronously. + * + * @param perRecordPauseMillis wait time between each record sent + * @param maxRecordCount maximum records to produce + * @param maxDurationSecs maximum duration to produce (May prevent maximum records from being produces) + */ + public Future produce(int perRecordPauseMillis, long maxRecordCount, int maxDurationSecs) { + checkClosed(); + var r = new Callable() { + @Override + public Metrics call() { + final long maxDuration = maxDurationSecs * 1000; + final long beginTime = System.currentTimeMillis(); + long recCount = 0; + long duration = 0; + boolean isDone = false; + var rec = new Row(schema, new ArrayList<>(columnDefs.getCount())); + while (!isClosed.get() && !isDone) { + try { + if (recCount >= maxRecordCount) { + isDone = true; + continue; + } + // Build a record with the column defs for Parquet row write + for (int i = 0, n = columnDefs.getCount(); i < n; i++) { + var v = columnDefs.nextValue(i, recCount, maxRecordCount); + rec.addValue(v); + } + // Write the record to Parquet file + writer.write(rec); + + if (++recCount % 10_000_000 == 0) + Log.info("Produced %s records to topic '%s'", recCount, topic); + duration = System.currentTimeMillis() - beginTime; + if (duration > maxDuration) + isDone = true; + } catch (Exception ex) { + throw new RuntimeException("Failed to send to topic: " + topic, ex); + } + } + Log.info("Produced %s records to topic: %s", recCount, topic); + var metrics = new Metrics("test-runner", "generate." + topic).set("duration.secs", duration / 1000.0) + .set("record.count", recCount).set("send.rate", recCount / (duration / 1000.0)); + return metrics; + } + }; + return queue.submit(r); + } + + /** + * Close the producer and shutdown any async threads created during production + */ + public void close() { + if (isClosed.get()) + return; + isClosed.set(true); + queue.shutdown(); + try { + writer.close(); + } catch (Exception ex) { + throw new RuntimeException("Failed to close Parquet writer for topic: " + topic, ex); + } + } + + private void checkClosed() { + if (isClosed.get()) + throw new RuntimeException("Generator is closed"); + } + + private ParquetWriter createParquetWriter(MessageType schema, Path parquetOut) { + try { + Dehydrator dehydrator = (row, valueWriter) -> { + row.write(valueWriter); + }; + return ParquetWriter.writeFile(schema, parquetOut.toFile(), dehydrator); + } catch (Exception ex) { + throw new RuntimeException("Failed to create Parquet writer for topic: " + topic, ex); + } + } + + private String getSchemaMessage(String topic, ColumnDefs fieldDefs) { + var schema = """ + message ${topic} { + ${fields} + } + """; + var fields = ""; + for (Map.Entry e : fieldDefs.toTypeMap().entrySet()) { + var name = e.getKey(); + var type = e.getValue(); + fields += String.format("required %s %s %s\n", getFieldType(type), name, getCharEncoding(type)); + } + schema = schema.replace("${topic}", topic); + return schema.replace("${fields}", fields); + } + + private String getFieldType(String type) { + return switch (type) { + case "long" -> "int64"; + case "int" -> "int32"; + case "double" -> "double"; + case "float" -> "float"; + case "string" -> "string"; + case "timestamp-millis" -> "google.protobuf.Timestamp"; + default -> throw new RuntimeException("Unsupported generator data type: " + type); + }; + } + + private String getCharEncoding(String type) { + return switch (type) { + case "string" -> "UTF8"; + default -> ""; + }; + } + + record Row(MessageType schema, List values) { + public void addValue(Object value) { + values.add(value); + } + + public void write(ValueWriter valueWriter) { + for (int i = 0, n = values.size(); i < n; i++) { + valueWriter.write(schema.getFieldName(i), values.get(i)); + } + } + + public void clear() { + values.clear(); + } + } + +} diff --git a/src/main/java/io/deephaven/benchmark/util/Filer.java b/src/main/java/io/deephaven/benchmark/util/Filer.java index f14a85a9..b8d5bb4d 100644 --- a/src/main/java/io/deephaven/benchmark/util/Filer.java +++ b/src/main/java/io/deephaven/benchmark/util/Filer.java @@ -6,6 +6,8 @@ import java.net.URL; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.attribute.PosixFilePermissions; import java.util.Comparator; import java.util.stream.Collectors; @@ -34,6 +36,25 @@ static public void delete(Path path) { } } + /** + * Create a file with the given name in the given parent directory. Create the parent directory if it does not + * exist. Permissions are 755 for directories and 644 for files. + * + * @param parentDir the parent directory to contain the file + * @param fileName the name of the file to create + * @return the path of the created file + */ + static public Path createFile(String parentDir, String fileName) { + try { + var d = Files.createDirectories(Paths.get(parentDir), PosixFilePermissions.asFileAttribute( + PosixFilePermissions.fromString("rwxr-xr-x"))); + return Files.createFile(d.resolve(fileName), + PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rw-r--r--"))); + } catch (Exception ex) { + throw new RuntimeException("Failed to create temp file: " + fileName, ex); + } + } + /** * Read the text of a file while preserving newlines and getting rid of carriage returns * From 86041116d663f8f2e8923cc9c4fbcd16adbc140a Mon Sep 17 00:00:00 2001 From: stanbrub Date: Mon, 23 Mar 2026 20:19:29 -0600 Subject: [PATCH 04/25] Added local parquet generator and 1st training test --- .../tests/standard/StandardTestRunner.java | 51 ++++++++++++++----- .../benchmark/tests/train/AggByTrainTest.java | 40 +++++++++++++++ .../tests/train/TrainTestRunner.java | 40 +++++++++++++++ .../io/deephaven/benchmark/api/Bench.java | 6 +-- .../deephaven/benchmark/api/BenchTable.java | 39 +++++++------- .../generator/LocalParquetGenerator.java | 19 ++++--- 6 files changed, 152 insertions(+), 43 deletions(-) create mode 100644 src/it/java/io/deephaven/benchmark/tests/train/AggByTrainTest.java create mode 100644 src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java b/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java index 09530e72..58e0c72f 100644 --- a/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java +++ b/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java @@ -1,4 +1,4 @@ -/* Copyright (c) 2022-2024 Deephaven Data Labs and Patent Pending */ +/* Copyright (c) 2022-2026 Deephaven Data Labs and Patent Pending */ package io.deephaven.benchmark.tests.standard; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -32,6 +32,8 @@ final public class StandardTestRunner { private int staticFactor = 1; private int incFactor = 1; private int rowCountFactor = 1; + private boolean useMemorySource = true; + private boolean useLocalParquet = false; public StandardTestRunner(Object testInst) { this.testInst = testInst; @@ -96,6 +98,25 @@ public void setServices(String... services) { requiredServices.addAll(Arrays.asList(services)); } + /** + * Set if the generated tables are loaded into memory before running the test queries. + * + * @return true if in memory source, otherwise false + */ + public void useMemorySource(boolean useMemorySource) { + this.useMemorySource = useMemorySource; + } + + /** + * Set if the generated tables are created through Deephaven (i.e. real client-server) or through the local file + * system (i.e. a local copy). The default of "false" is preferred. + * + * @param useLocalParquet false to generate tables through Deephaven, otherwise false + */ + public void useLocalParquet(boolean useLocalParquet) { + this.useLocalParquet = useLocalParquet; + } + /** * Add a query to be run directly after the main table is loaded. It is not measured. This query can transform the * main table or supporting table, set up aggregations or updateby operations, etc. @@ -207,26 +228,28 @@ long getMaxExpectedRowCount(long expectedRowCount, long scaleFactor) { String getReadOperation(int scaleFactor, long rowCount, String... loadColumns) { var headRows = (rowCount >= getGeneratedRowCount()) ? "" : ".head(${rows})"; + var selectStr = useMemorySource ? "select" : "view"; if (scaleFactor > 1 && mainTable.equals("timed") && Arrays.asList(loadColumns).contains("timestamp")) { var read = """ merge([ read('/data/timed.parquet').view(formulas=[${loadColumns}])${headRows} ] * ${scaleFactor}).update_view([ 'timestamp=timestamp.plusMillis((long)(ii / ${rows}) * ${rows})' - ]).select() + ]).${selectStr}() """; - read = read.replace("${headRows}", headRows); + read = read.replace("${headRows}", headRows).replace("${selectStr}", selectStr); return read.replace("${scaleFactor}", "" + scaleFactor).replace("${rows}", "" + rowCount); } - var read = "read('/data/${mainTable}.parquet')${headRows}.select(formulas=[${loadColumns}])"; + var read = "read('/data/${mainTable}.parquet')${headRows}.${selectStr}(formulas=[${loadColumns}])"; read = (loadColumns.length == 0) ? ("empty_table(${rows})") : read; if (scaleFactor > 1) { read = "merge([${readTable}] * ${scaleFactor})".replace("${readTable}", read); read = read.replace("${scaleFactor}", "" + scaleFactor); } - return read.replace("${headRows}", headRows).replace("${rows}", "" + rowCount); + read = read.replace("${headRows}", headRows).replace("${rows}", "" + rowCount); + return read.replace("${selectStr}", selectStr); } String getStaticQuery(String name, String operation, long rowCount, String... loadColumns) { @@ -435,7 +458,7 @@ boolean generateNamedTable(String name, String distribution, String[] groups) { } boolean generateSourceTable(String distribution, String[] groups) { - return api.table("source") + var t = api.table("source") .add("num1", "double", "[0-4]", distribution) .add("num2", "double", "[1-10]", distribution) .add("key1", "string", "[1-100]", distribution) @@ -444,8 +467,8 @@ boolean generateSourceTable(String distribution, String[] groups) { .add("key4", "int", "[0-98]", distribution) .add("key5", "string", "[1-1000000]", distribution) .withRowCount(getGeneratedRowCount()) - .withColumnGrouping(groups) - .generateParquet(); + .withColumnGrouping(groups); + return useLocalParquet ? t.generateLocalParquet() : t.generateParquet(); } boolean generateRightTable(String distribution, String[] groups) { @@ -455,21 +478,21 @@ boolean generateRightTable(String distribution, String[] groups) { distribution = "ascending"; } supportTables.add("right"); - return api.table("right") + var t = api.table("right") .add("r_key1", "string", "[1-100]", distribution) .add("r_key2", "string", "[1-101]", distribution) .add("r_wild", "string", "[1-10000]", distribution) .add("r_key4", "int", "[0-98]", distribution) .add("r_key5", "string", "[1-1010000]", distribution) .withRowCount(1010000) - .withColumnGrouping(groups) - .generateParquet(); + .withColumnGrouping(groups); + return useLocalParquet ? t.generateLocalParquet() : t.generateParquet(); } boolean generateTimedTable(String distribution, String[] groups) { long minTime = 1676557157537L; long maxTime = minTime + getGeneratedRowCount() - 1; - return api.table("timed") + var t = api.table("timed") .add("timestamp", "timestamp-millis", "[" + minTime + "-" + maxTime + "]", "ascending") .add("num1", "double", "[0-4]", distribution) .add("num2", "double", "[1-10]", distribution) @@ -478,8 +501,8 @@ boolean generateTimedTable(String distribution, String[] groups) { .add("key3", "int", "[0-8]", distribution) .add("key4", "int", "[0-98]", distribution) .withFixedRowCount(true) - .withColumnGrouping(groups) - .generateParquet(); + .withColumnGrouping(groups); + return useLocalParquet ? t.generateLocalParquet() : t.generateParquet(); } record Result(long loadedRowCount, Duration elapsedTime, long resultRowCount) { diff --git a/src/it/java/io/deephaven/benchmark/tests/train/AggByTrainTest.java b/src/it/java/io/deephaven/benchmark/tests/train/AggByTrainTest.java new file mode 100644 index 00000000..e57f9393 --- /dev/null +++ b/src/it/java/io/deephaven/benchmark/tests/train/AggByTrainTest.java @@ -0,0 +1,40 @@ +/* Copyright (c) 2026-2026 Deephaven Data Labs and Patent Pending */ +package io.deephaven.benchmark.tests.train; + +import org.junit.jupiter.api.*; + +/** + * Standard tests for the aggBy table operation. Applies basic math aggregations to table data + */ +public class AggByTrainTest { + final TrainTestRunner runner = new TrainTestRunner(this); + + @BeforeEach + void setup() { + runner.setRowFactor(2); + runner.tables("source"); + + var setupStr = """ + from deephaven import agg + + aggs = [ + agg.sum_('Sum=num1'), agg.std('Std=num2'), agg.min_('Min=num1'), agg.max_('Max=num2'), + agg.avg('Avg=num1'), agg.var('Var=num2'), agg.count_('num1') + ] + """; + runner.addSetupQuery(setupStr); + } + + @Test + void mathComboAggBy7Ops0Groups() { + var q = "source.agg_by(aggs)"; + runner.test("MathCombo-AggBy- 7 Ops No Groups", 1, q, "num1", "num2"); + } + + @Test + void mathComboAggBy7Ops2Groups() { + var q = "source.agg_by(aggs, by=['key1', 'key2'])"; + runner.test("MathCombo-AggBy- 7 Ops 2 Groups 10K Unique Combos ", 10100, q, "key1", "key2", "num1", "num2"); + } + +} diff --git a/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java b/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java new file mode 100644 index 00000000..9acba188 --- /dev/null +++ b/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java @@ -0,0 +1,40 @@ +/* Copyright (c) 2026-2026 Deephaven Data Labs and Patent Pending */ +package io.deephaven.benchmark.tests.train; + +import io.deephaven.benchmark.tests.standard.StandardTestRunner; + +/** + * A wrapper for the Bench api that allows the running of operational training (think AOT) tests without requiring the + * boilerplate logic like imports, parquet reads, time measurement logic, etc. Each test runs two + * benchmarks; one reading from a static parquet, and the other exercising ticking tables through the + * AutotuningIncrementalReleaseFilter. This is different from the StandardTestRunner in that + * it runs more than one operation per benchmark and attempts to cover the majority of the query code base with fewer + * benchmarks. It is meant for training AOT and for "representative" benchmarks used to compare things like JDK/Python + * versions and GC types. + */ +final public class TrainTestRunner { + final StandardTestRunner delegate; + + TrainTestRunner(Object testInst) { + this.delegate = new StandardTestRunner(testInst); + delegate.useMemorySource(false); + delegate.useLocalParquet(true); + } + + public void setRowFactor(int i) { + delegate.setRowFactor(i); + } + + public void tables(String... names) { + delegate.tables(names); + } + + public void addSetupQuery(String query) { + delegate.addSetupQuery(query); + } + + public void test(String name, long maxExpectedRowCount, String operation, String... loadColumns) { + delegate.test(name, maxExpectedRowCount, operation, loadColumns); + } + +} \ No newline at end of file diff --git a/src/main/java/io/deephaven/benchmark/api/Bench.java b/src/main/java/io/deephaven/benchmark/api/Bench.java index 182f6bb1..c5570512 100644 --- a/src/main/java/io/deephaven/benchmark/api/Bench.java +++ b/src/main/java/io/deephaven/benchmark/api/Bench.java @@ -31,7 +31,7 @@ final public class Bench { * The root benchmark result directory */ static final public Path tmpDir = createTmpDir(); - + /** * The name of the benchmark results csv file */ @@ -297,9 +297,9 @@ static private void setSystemProperties() { System.setProperty("timestamp.test.results", "false"); } } - + static private Path createTmpDir() { - var f = Filer.createFile(System.getProperty("java.io.tmpdir"), "test.delete.me"); + var f = Filer.createFile(System.getProperty("java.io.tmpdir") + "/bench", "test.delete.me"); Filer.putFileText(f, "test write and delete"); Filer.delete(f); return f.getParent(); diff --git a/src/main/java/io/deephaven/benchmark/api/BenchTable.java b/src/main/java/io/deephaven/benchmark/api/BenchTable.java index 93fdd5a1..88ebf3e3 100644 --- a/src/main/java/io/deephaven/benchmark/api/BenchTable.java +++ b/src/main/java/io/deephaven/benchmark/api/BenchTable.java @@ -6,6 +6,7 @@ import java.time.temporal.ChronoUnit; import java.util.concurrent.Future; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; import io.deephaven.benchmark.generator.*; import io.deephaven.benchmark.metric.Metrics; import io.deephaven.benchmark.util.Ids; @@ -220,11 +221,15 @@ public boolean generateParquet() { */ public boolean generateLocalParquet() { columns.setDefaultDistribution(getDefaultDistro()); - String q = replaceTableAndGeneratorFields(useExistingParquetQuery); + var q = replaceTableAndGeneratorFields(useExistingParquetQuery); - AtomicBoolean usedExistingParquet = new AtomicBoolean(false); + var usedExistingParquet = new AtomicBoolean(false); + var tableGenParquet = new AtomicReference(""); + var hostDataDir = new AtomicReference(""); bench.query(q).fetchAfter("used_existing_parquet_" + tableName, table -> { usedExistingParquet.set(table.getValue(0, "UsedExistingParquet").toString().equalsIgnoreCase("true")); + tableGenParquet.set(table.getValue(0, "TableGenParquet").toString()); + hostDataDir.set(table.getValue(0, "HostDataDir").toString()); }).execute(); if (usedExistingParquet.get()) { @@ -237,13 +242,12 @@ public boolean generateLocalParquet() { if (rowPauseMillis < 0) withRowPause(0, ChronoUnit.MILLIS); - var m = bench.awaitCompletion(generateWithLocalParquet()); + var m = bench.awaitCompletion(generateWithLocalParquet(hostDataDir.get(), tableGenParquet.get())); Log.info("Produce Send Rate: %.2f recs/sec", m.getValue("send.rate")); Log.info("Produce Data Duration: %d secs", timer.duration().toMillis()); timer = Timer.start(); - q = replaceTableAndGeneratorFields(localToParquetQuery); - bench.query(q).execute(); + bench.query(localToParquetQuery).execute(); Log.info("DH Write Table Duration: %d secs", timer.duration().toMillis()); return true; @@ -277,9 +281,11 @@ private Future generateWithProtobuf() { generator = new ProtobufKafkaGenerator(bootstrapServer, schemaRegistry, tableName, columns, getCompression()); return generator.produce(getRowPause(), getRowCount(), getRunDuration()); } - - private Future generateWithLocalParquet() { - String parquetFile = Bench.tmpDir.resolve("local.tmp.parquet").toAbsolutePath().toString(); + + private Future generateWithLocalParquet(String hostDataDir, String tableGenParquet) { + if (hostDataDir.isEmpty()) + throw new RuntimeException("HOST_DATA_DIR env must be set to use local parquet generation"); + String parquetFile = hostDataDir + "/" + tableGenParquet.replaceAll("^/data/", ""); generator = new LocalParquetGenerator(parquetFile, tableName, columns, getCompression()); return generator.produce(getRowPause(), getRowCount(), getRunDuration()); } @@ -349,7 +355,6 @@ private String replaceTableAndGeneratorFields(String query) { String compression = String.format(", compression_codec_name='%s'", codec); return query.replace("${table.name}", tableName) - .replace("${local.tmp.dir}", Bench.tmpDir.toAbsolutePath().toString()) .replace("${compression.codec}", compression) .replace("${max.dict.keys}", ", max_dictionary_keys=2000000") .replace("${max.dict.bytes}", ", max_dictionary_size=16777216") @@ -365,7 +370,6 @@ private String replaceTableAndGeneratorFields(String query) { static final String generatorDefValues = """ # Define files and generator configuration - local_tmp_parquet = '${local.tmp.dir}/local.tmp.parquet' table_parquet = '/data/${table.name}.parquet' table_gen_parquet = '/data/${table.definition.id}.gen.parquet' table_gen_def_text = '''${table.definition}''' @@ -395,7 +399,11 @@ with open(path) as f: os.link(str(matching_gen_parquet) + '.gen.parquet', table_parquet) usedExisting = True - used_existing_parquet_${table.name} = new_table([string_col("UsedExistingParquet", [str(usedExisting)])]) + used_existing_parquet_${table.name} = new_table([ + string_col("UsedExistingParquet", [str(usedExisting)]), + string_col("TableGenParquet", [table_gen_parquet]), + string_col("HostDataDir", [os.getenv("HOST_DATA_DIR","")]) + ]) """; static final String kafkaToParquetQuery = """ @@ -435,9 +443,9 @@ with open(table_gen_def_file, 'w') as f: from deephaven import garbage_collect garbage_collect() """; - + static final String localToParquetQuery = """ - # Create a Parquet file from a Kafka topic + # Link an already created parquet file import jpy, os if os.path.exists(table_parquet): @@ -446,13 +454,8 @@ with open(table_gen_def_file, 'w') as f: with open(table_gen_def_file, 'w') as f: f.write(table_gen_def_text) - import shutil - shutil.move(local_tmp_parquet, ${table.gen.parquet}) - os.link(table_gen_parquet, table_parquet) - del ${table.name} - from deephaven import garbage_collect garbage_collect() """; diff --git a/src/main/java/io/deephaven/benchmark/generator/LocalParquetGenerator.java b/src/main/java/io/deephaven/benchmark/generator/LocalParquetGenerator.java index 7ff0009b..884e5d33 100644 --- a/src/main/java/io/deephaven/benchmark/generator/LocalParquetGenerator.java +++ b/src/main/java/io/deephaven/benchmark/generator/LocalParquetGenerator.java @@ -1,4 +1,4 @@ -/* Copyright (c) 2026-2026 Deephaven Data Labs and Patent Pending */ +/* Copyright (c) 2022-2026 Deephaven Data Labs and Patent Pending */ package io.deephaven.benchmark.generator; import java.nio.file.Path; @@ -14,11 +14,13 @@ import blue.strategic.parquet.*; /** - * Generator that produces rows to a local Parquet file according to the provided column definitions. - * consumer. + * Generator that produces rows to a local Parquet file according to the provided column definitions. Note: This + * generator MUST generate the same row and column data in the same order and types as the non-local + * AvroKafkaGenerator when the two generators have the same column definitions. (The "same data" is defined + * by how it looks in Deephaven tables, not byte-for-byte in the files.) */ public class LocalParquetGenerator implements Generator { - final private ExecutorService queue = Threads.single("ProtobufKafkaGenerator"); + final private ExecutorService queue = Threads.single("LocalParquetGenerator"); final private Path parquetOut; final private ParquetWriter writer; final private ColumnDefs columnDefs; @@ -75,6 +77,7 @@ public Metrics call() { } // Write the record to Parquet file writer.write(rec); + rec.clear(); if (++recCount % 10_000_000 == 0) Log.info("Produced %s records to topic '%s'", recCount, topic); @@ -82,7 +85,7 @@ public Metrics call() { if (duration > maxDuration) isDone = true; } catch (Exception ex) { - throw new RuntimeException("Failed to send to topic: " + topic, ex); + throw new RuntimeException("Failed to write to topic: " + topic, ex); } } Log.info("Produced %s records to topic: %s", recCount, topic); @@ -135,7 +138,7 @@ private String getSchemaMessage(String topic, ColumnDefs fieldDefs) { for (Map.Entry e : fieldDefs.toTypeMap().entrySet()) { var name = e.getKey(); var type = e.getValue(); - fields += String.format("required %s %s %s\n", getFieldType(type), name, getCharEncoding(type)); + fields += String.format("required %s %s %s;\n", getFieldType(type), name, getCharEncoding(type)); } schema = schema.replace("${topic}", topic); return schema.replace("${fields}", fields); @@ -147,7 +150,7 @@ private String getFieldType(String type) { case "int" -> "int32"; case "double" -> "double"; case "float" -> "float"; - case "string" -> "string"; + case "string" -> "binary"; case "timestamp-millis" -> "google.protobuf.Timestamp"; default -> throw new RuntimeException("Unsupported generator data type: " + type); }; @@ -155,7 +158,7 @@ private String getFieldType(String type) { private String getCharEncoding(String type) { return switch (type) { - case "string" -> "UTF8"; + case "string" -> "(UTF8)"; default -> ""; }; } From 83b1c11167cc6b4e9ffe95f5f2ab0ea35c5189d8 Mon Sep 17 00:00:00 2001 From: stanbrub Date: Wed, 25 Mar 2026 13:53:41 -0600 Subject: [PATCH 05/25] Added more train benchmarks. Improved Local Parquet Generator --- pom.xml | 3 +- .../tests/standard/StandardTestRunner.java | 14 ++--- .../benchmark/tests/train/AggByTrainTest.java | 23 ++++---- .../tests/train/FilterTrainTest.java | 45 ++++++++++++++++ .../tests/train/FormulaTrainTest.java | 50 +++++++++++++++++ .../tests/train/NaturalJoinTrainTest.java | 33 ++++++++++++ .../tests/train/OrderedTrainTest.java | 43 +++++++++++++++ .../tests/train/TrainTestRunner.java | 17 +++--- .../tests/train/UpdateByTrainTest.java | 53 +++++++++++++++++++ .../io/deephaven/benchmark/api/Bench.java | 11 ---- .../deephaven/benchmark/api/BenchTable.java | 14 +++-- .../benchmark/generator/ColumnDefs.java | 2 +- .../generator/LocalParquetGenerator.java | 23 ++++---- 13 files changed, 275 insertions(+), 56 deletions(-) create mode 100644 src/it/java/io/deephaven/benchmark/tests/train/FilterTrainTest.java create mode 100644 src/it/java/io/deephaven/benchmark/tests/train/FormulaTrainTest.java create mode 100644 src/it/java/io/deephaven/benchmark/tests/train/NaturalJoinTrainTest.java create mode 100644 src/it/java/io/deephaven/benchmark/tests/train/OrderedTrainTest.java create mode 100644 src/it/java/io/deephaven/benchmark/tests/train/UpdateByTrainTest.java diff --git a/pom.xml b/pom.xml index 490b2691..c0c7c2dd 100644 --- a/pom.xml +++ b/pom.xml @@ -178,7 +178,7 @@ ${project.basedir}/eclipse-java-google-style.xml - /* Copyright (c) 2022-$YEAR Deephaven Data Labs and Patent Pending */ + /* Copyright (c) $YEAR Deephaven Data Labs and Patent Pending */ @@ -276,6 +276,7 @@ deephaven-java-client-barrage-dagger 41.3 + blue.strategic.parquet parquet-floor diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java b/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java index 58e0c72f..a36c5647 100644 --- a/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java +++ b/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java @@ -214,12 +214,12 @@ public void test(String name, long maxExpectedRowCount, String operation, String } } - long getWarmupRowCount() { - return (long) (api.propertyAsIntegral("warmup.row.count", "0") * rowCountFactor); + public long getGeneratedRowCount() { + return (long) (api.propertyAsIntegral("scale.row.count", "100000") * rowCountFactor); } - long getGeneratedRowCount() { - return (long) (api.propertyAsIntegral("scale.row.count", "100000") * rowCountFactor); + long getWarmupRowCount() { + return (long) (api.propertyAsIntegral("warmup.row.count", "0") * rowCountFactor); } long getMaxExpectedRowCount(long expectedRowCount, long scaleFactor) { @@ -478,15 +478,15 @@ boolean generateRightTable(String distribution, String[] groups) { distribution = "ascending"; } supportTables.add("right"); - var t = api.table("right") + return api.table("right") .add("r_key1", "string", "[1-100]", distribution) .add("r_key2", "string", "[1-101]", distribution) .add("r_wild", "string", "[1-10000]", distribution) .add("r_key4", "int", "[0-98]", distribution) .add("r_key5", "string", "[1-1010000]", distribution) .withRowCount(1010000) - .withColumnGrouping(groups); - return useLocalParquet ? t.generateLocalParquet() : t.generateParquet(); + .withColumnGrouping(groups) + .generateParquet(); } boolean generateTimedTable(String distribution, String[] groups) { diff --git a/src/it/java/io/deephaven/benchmark/tests/train/AggByTrainTest.java b/src/it/java/io/deephaven/benchmark/tests/train/AggByTrainTest.java index e57f9393..be031ebf 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/AggByTrainTest.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/AggByTrainTest.java @@ -4,15 +4,14 @@ import org.junit.jupiter.api.*; /** - * Standard tests for the aggBy table operation. Applies basic math aggregations to table data + * Training tests for the aggBy table operations that do aggregations (e.g. sum, std, min/max. var, avg). See + * TrainTestRunner for more information. */ public class AggByTrainTest { final TrainTestRunner runner = new TrainTestRunner(this); - @BeforeEach - void setup() { - runner.setRowFactor(2); - runner.tables("source"); + void setup(double rowFactor) { + runner.tables(rowFactor, "timed"); var setupStr = """ from deephaven import agg @@ -26,15 +25,17 @@ void setup() { } @Test - void mathComboAggBy7Ops0Groups() { - var q = "source.agg_by(aggs)"; - runner.test("MathCombo-AggBy- 7 Ops No Groups", 1, q, "num1", "num2"); + void aggBy0Groups() { + setup(40); + var q = "timed.agg_by(aggs)"; + runner.test("AggBy- No Groups", 1, q, "num1", "num2"); } @Test - void mathComboAggBy7Ops2Groups() { - var q = "source.agg_by(aggs, by=['key1', 'key2'])"; - runner.test("MathCombo-AggBy- 7 Ops 2 Groups 10K Unique Combos ", 10100, q, "key1", "key2", "num1", "num2"); + void aggBy2Groups() { + setup(20); + var q = "timed.agg_by(aggs, by=['key1', 'key2'])"; + runner.test("AggBy- 2 Groups 10K Unique Combos ", 10100, q, "key1", "key2", "num1", "num2"); } } diff --git a/src/it/java/io/deephaven/benchmark/tests/train/FilterTrainTest.java b/src/it/java/io/deephaven/benchmark/tests/train/FilterTrainTest.java new file mode 100644 index 00000000..96636b25 --- /dev/null +++ b/src/it/java/io/deephaven/benchmark/tests/train/FilterTrainTest.java @@ -0,0 +1,45 @@ +/* Copyright (c) 2022-2024 Deephaven Data Labs and Patent Pending */ +package io.deephaven.benchmark.tests.train; + +import org.junit.jupiter.api.*; +import io.deephaven.benchmark.tests.standard.StandardTestRunner; + +/** + * Standard tests for the whereIn table operation. Filters rows of data from the source table where the rows match + * column values in the filter table. + */ +@Tag("Iterate") +public class FilterTrainTest { + final TrainTestRunner runner = new TrainTestRunner(this); + + void setup(double rowFactor) { + runner.tables(rowFactor, "timed"); + var setup = """ + from deephaven.column import string_col, int_col + where_filter = new_table([ + string_col("set1", ['1', '2', '3', '4', '5', '6', '7', '8']), + string_col("set2", ['10', '20', '30', '40', '50', '60', '70', '80']), + int_col("set3", [-1, -2, -3, -4, 1, 2, 3, 4]) + ]) + """; + runner.addSetupQuery(setup); + } + + @Test + void filter1Col() { + setup(40); + var q = "timed.where_in(where_filter, cols=['key1 = set1']).where(['key1 < `4`'])"; + runner.test("Filter- 1 Col", 0, q, "key1", "num1"); + } + + @Test + void filter3Cols() { + setup(40); + var q = """ + timed.where_in(where_filter, cols=['key1 = set1', 'key2 = set2', 'key3 = set3']) \ + .where(filters=["key1 = '1'", "key2 < '100'", "key3 in -2, -1, 0, 1, 2"]) + """; + runner.test("Filter- 3 Cols", 0, q, "key1", "key2", "key3", "num1"); + } + +} diff --git a/src/it/java/io/deephaven/benchmark/tests/train/FormulaTrainTest.java b/src/it/java/io/deephaven/benchmark/tests/train/FormulaTrainTest.java new file mode 100644 index 00000000..00b263c6 --- /dev/null +++ b/src/it/java/io/deephaven/benchmark/tests/train/FormulaTrainTest.java @@ -0,0 +1,50 @@ +/* Copyright (c) 2026-2026 Deephaven Data Labs and Patent Pending */ +package io.deephaven.benchmark.tests.train; + +import org.junit.jupiter.api.*; + +/** + * Training tests for the formula table operations (e.g. udf, inline). See TrainTestRunner for more + * information. + */ +public class FormulaTrainTest { + final TrainTestRunner runner = new TrainTestRunner(this); + + void setup(double rowFactor) { + runner.tables(rowFactor, "timed"); + } + + @Test + void formulaUdf() { + setup(5); + var setup = """ + def f_py(num1: float, num2: float) -> float: + return (num2 + num1) / 2 + def f_np(num1: np.float64, num2: np.float64) -> np.float64: + return num1 + num2 + """; + runner.addSetupQuery(setup); + var q = "timed.view(['New1 = f_py(num1, num2)','New2 = f_np(num1, num2)']).sum_by()"; + runner.test("Formula- UDF 2 Calcs", 1, q, "num1", "num2"); + } + + @Test + void formulaInline() { + setup(40); + var q = "timed.view(['New1 = (float)((num2 + num1) / 2)', 'New2 = (float)(num1 + num2)']).sum_by()"; + runner.test("Formula- Inline 2 Calcs", 1, q, "num1", "num2"); + } + + @Test + void formulaDate() { + setup(1.75); + var q = """ + timed.view([ + 'New1 = parseDuration(`PT4H52M14S`).toHours()', + 'New1 = parseInstant(`2023-05-31T04:52:14.001 ET`).getEpochSecond()' + ]).sum_by() + """; + runner.test("Formula- Inline 2 Dates", 1, q, "num1", "num2"); + } + +} diff --git a/src/it/java/io/deephaven/benchmark/tests/train/NaturalJoinTrainTest.java b/src/it/java/io/deephaven/benchmark/tests/train/NaturalJoinTrainTest.java new file mode 100644 index 00000000..b8d12107 --- /dev/null +++ b/src/it/java/io/deephaven/benchmark/tests/train/NaturalJoinTrainTest.java @@ -0,0 +1,33 @@ +/* Copyright (c) 2026-2026 Deephaven Data Labs and Patent Pending */ +package io.deephaven.benchmark.tests.train; + +import org.junit.jupiter.api.*; + +/** + * Training tests for the aggBy table operations that do joins (e.g. natural join). See + * TrainTestRunner for more information. + */ +public class NaturalJoinTrainTest { + final TrainTestRunner runner = new TrainTestRunner(this); + + void setup(double rowFactor) { + runner.tables(rowFactor, "timed", "right"); + } + + @Test + void naturalJoinOn1Col() { + setup(40); + var r = "right = right.select_distinct(['r_wild'])"; + runner.addSetupQuery(r); + var q = "timed.natural_join(right, on=['key1 = r_wild'])"; + runner.test("NaturalJoin- Join On 1 Col", 0, q, "key1", "num1"); + } + + @Test + void naturalJoinOn3Cols() { + setup(20); + var q = "timed.natural_join(right, on=['key1 = r_wild', 'key2 = r_key2', 'key1 = r_key1'])"; + runner.test("NaturalJoin- Join On 3 Cols", 0, q, "key1", "key2", "num1"); + } + +} diff --git a/src/it/java/io/deephaven/benchmark/tests/train/OrderedTrainTest.java b/src/it/java/io/deephaven/benchmark/tests/train/OrderedTrainTest.java new file mode 100644 index 00000000..7e27bb40 --- /dev/null +++ b/src/it/java/io/deephaven/benchmark/tests/train/OrderedTrainTest.java @@ -0,0 +1,43 @@ +/* Copyright (c) 2026-2026 Deephaven Data Labs and Patent Pending */ +package io.deephaven.benchmark.tests.train; + +import org.junit.jupiter.api.*; +import io.deephaven.benchmark.tests.standard.StandardTestRunner; + +/** + * Training tests for the aggBy table operations that do ordering (e.g.. median, percentile, sorted_first/last). See + * TrainTestRunner for more information. + * + */ +public class OrderedTrainTest { + final TrainTestRunner runner = new TrainTestRunner(this); + + void setup(double rowFactor) { + runner.tables(rowFactor, "timed"); + + var setupStr = """ + from deephaven import agg + aggs = [ + agg.median('Median=num1'), agg.pct(0.50, ['Percentile=num1']), + agg.unique('Unique=num2'), agg.sorted_first('key4', ['num2']), + agg.sorted_last('key3', ['num1']) + ] + """; + runner.addSetupQuery(setupStr); + } + + @Test + void ordered0Groups() { + setup(21); + var q = "timed.agg_by(aggs)"; + runner.test("Ordered- No Groups", 100, q, "key3", "key4", "num1", "num2"); + } + + @Test + void ordered2Groups() { + setup(5); + var q = "timed.agg_by(aggs, by=['key1', 'key2'])"; + runner.test("Ordered- 2 Groups 10K Unique Combos", 10100, q, "key1", "key2", "key3", "key4", "num1", "num2"); + } + +} diff --git a/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java b/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java index 9acba188..38b3a4cf 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java @@ -1,6 +1,7 @@ /* Copyright (c) 2026-2026 Deephaven Data Labs and Patent Pending */ package io.deephaven.benchmark.tests.train; +import java.util.Arrays; import io.deephaven.benchmark.tests.standard.StandardTestRunner; /** @@ -13,20 +14,24 @@ * versions and GC types. */ final public class TrainTestRunner { + static final int maxRowFactor = 40; final StandardTestRunner delegate; + final long baseRowCount; TrainTestRunner(Object testInst) { this.delegate = new StandardTestRunner(testInst); + this.baseRowCount = delegate.getGeneratedRowCount(); delegate.useMemorySource(false); delegate.useLocalParquet(true); + delegate.setRowFactor(maxRowFactor); } - public void setRowFactor(int i) { - delegate.setRowFactor(i); - } - - public void tables(String... names) { + public void tables(double rowFactor, String... names) { delegate.tables(names); + if (rowFactor > maxRowFactor) + throw new IllegalArgumentException("Row factor cannot be greater than " + maxRowFactor); + var q = "%s = %s.head(%d)".formatted(names[0], names[0], (long) (baseRowCount * rowFactor)); + delegate.addSetupQuery(q); } public void addSetupQuery(String query) { @@ -37,4 +42,4 @@ public void test(String name, long maxExpectedRowCount, String operation, String delegate.test(name, maxExpectedRowCount, operation, loadColumns); } -} \ No newline at end of file +} diff --git a/src/it/java/io/deephaven/benchmark/tests/train/UpdateByTrainTest.java b/src/it/java/io/deephaven/benchmark/tests/train/UpdateByTrainTest.java new file mode 100644 index 00000000..4af63515 --- /dev/null +++ b/src/it/java/io/deephaven/benchmark/tests/train/UpdateByTrainTest.java @@ -0,0 +1,53 @@ +/* Copyright (c) 2022-2026 Deephaven Data Labs and Patent Pending */ +package io.deephaven.benchmark.tests.train; + +import org.junit.jupiter.api.*; + +/** + * Standard tests for the updateBy table operation. Combines a mixture of rolling operations and cumulative operations + */ +public class UpdateByTrainTest { + final TrainTestRunner runner = new TrainTestRunner(this); + final String noGroups = """ + avg_contains = rolling_avg_time(ts_col='timestamp',cols=['A=num1','B=num2'],rev_time='PT5S',fwd_time='PT5S') + max_before = rolling_max_tick(cols=['C=num1','D=num2'], rev_ticks=3000,fwd_ticks=-1000) + prod_after = rolling_prod_time(ts_col='timestamp',cols=['E=num1','F=num2'],rev_time='-PT1S',fwd_time='PT4S') + """; + final String group10K = """ + avg_contains = rolling_avg_time(ts_col='timestamp',cols=['A=num1','B=num2'],rev_time='PT4M',fwd_time='PT5M') + max_before = rolling_max_tick(cols=['C=num1','D=num2'], rev_ticks=30,fwd_ticks=-10) + prod_after = rolling_prod_time(ts_col='timestamp',cols=['E=num1','F=num2'],rev_time='-PT1M',fwd_time='PT4M') + """; + + void setup(double rowFactor) { + runner.tables(rowFactor, "timed"); + var setup = """ + from deephaven.updateby import rolling_avg_time, rolling_max_tick, rolling_prod_time + from deephaven.updateby import ema_tick, cum_min, cum_sum + + ema_tick_op = ema_tick(decay_ticks=10000,cols=['G=num1','H=num2']) + min_op = cum_min(cols=['I=num1','J=num2']) + sum_op = cum_sum(cols=['K=num1','L=num2']) + """; + runner.addSetupQuery(setup); + } + + @Test + void mixedComboNoGroups() { + setup(10); + runner.addSetupQuery(noGroups); + var q = "timed.update_by(ops=[avg_contains, max_before, prod_after, ema_tick_op, min_op, sum_op])"; + runner.test("UpdateBy- No Groups 12 Cols", 0, q, "num1", "num2", "timestamp"); + } + + @Test + void rollingCombo2Groups() { + setup(3); + runner.addSetupQuery(group10K); + var q = """ + timed.update_by(ops=[avg_contains,max_before,prod_after,ema_tick_op,min_op,sum_op], by=['key1','key2']) + """; + runner.test("UpdateBy- 2 Groups 10K Unique Combos", 0, q, "key1", "key2", "num1", "num2", "timestamp"); + } + +} diff --git a/src/main/java/io/deephaven/benchmark/api/Bench.java b/src/main/java/io/deephaven/benchmark/api/Bench.java index c5570512..f6225086 100644 --- a/src/main/java/io/deephaven/benchmark/api/Bench.java +++ b/src/main/java/io/deephaven/benchmark/api/Bench.java @@ -27,10 +27,6 @@ final public class Bench { * The root benchmark result directory */ static final public Path rootOutputDir = Paths.get("results"); - /** - * The root benchmark result directory - */ - static final public Path tmpDir = createTmpDir(); /** * The name of the benchmark results csv file @@ -298,11 +294,4 @@ static private void setSystemProperties() { } } - static private Path createTmpDir() { - var f = Filer.createFile(System.getProperty("java.io.tmpdir") + "/bench", "test.delete.me"); - Filer.putFileText(f, "test write and delete"); - Filer.delete(f); - return f.getParent(); - } - } diff --git a/src/main/java/io/deephaven/benchmark/api/BenchTable.java b/src/main/java/io/deephaven/benchmark/api/BenchTable.java index 88ebf3e3..9a090d1b 100644 --- a/src/main/java/io/deephaven/benchmark/api/BenchTable.java +++ b/src/main/java/io/deephaven/benchmark/api/BenchTable.java @@ -1,4 +1,4 @@ -/* Copyright (c) 2022-2024 Deephaven Data Labs and Patent Pending */ +/* Copyright (c) 2022-2026 Deephaven Data Labs and Patent Pending */ package io.deephaven.benchmark.api; import java.io.Closeable; @@ -200,19 +200,20 @@ public boolean generateParquet() { var m = bench.awaitCompletion(generateWithAvro()); Log.info("Produce Send Rate: %.2f recs/sec", m.getValue("send.rate")); - Log.info("Produce Data Duration: %d secs", timer.duration().toMillis()); + Log.info("Produce Data Duration: %d secs", timer.duration().toSeconds()); timer = Timer.start(); q = replaceTableAndGeneratorFields(kafkaToParquetQuery); bench.query(q).execute(); - Log.info("DH Write Table Duration: %d secs", timer.duration().toMillis()); + Log.info("DH Write Table Duration: %d secs", timer.duration().toSeconds()); return true; } /** * Generate the table synchronously to a parquet file in the engine's data directory. If a parquet file already - * exists in the Deephaven data directory that matches this table definition, use it and skip generation. + * exists in the Deephaven data directory that matches this table definition, use it and skip generation. + *

* Note: This is the same as generateParquet() except it generates the parquet file directly to the * engine's data directory without going through kafka. As such, it will not work when the test runner and the * engine are not co-located. @@ -244,12 +245,9 @@ public boolean generateLocalParquet() { var m = bench.awaitCompletion(generateWithLocalParquet(hostDataDir.get(), tableGenParquet.get())); Log.info("Produce Send Rate: %.2f recs/sec", m.getValue("send.rate")); - Log.info("Produce Data Duration: %d secs", timer.duration().toMillis()); - timer = Timer.start(); + Log.info("Produce Data Duration: %d secs", timer.duration().toSeconds()); bench.query(localToParquetQuery).execute(); - - Log.info("DH Write Table Duration: %d secs", timer.duration().toMillis()); return true; } diff --git a/src/main/java/io/deephaven/benchmark/generator/ColumnDefs.java b/src/main/java/io/deephaven/benchmark/generator/ColumnDefs.java index a2211eb4..70433815 100644 --- a/src/main/java/io/deephaven/benchmark/generator/ColumnDefs.java +++ b/src/main/java/io/deephaven/benchmark/generator/ColumnDefs.java @@ -12,7 +12,7 @@ * Note: All possible data values are loaded up front to prevent object-creation during production. This can take a * considerable amount of memory for larger scales, especially for generated strings. */ -public class ColumnDefs { +final public class ColumnDefs { final int valueCacheSize; final List columns = new ArrayList<>(); private String defaultDistribution = "random"; diff --git a/src/main/java/io/deephaven/benchmark/generator/LocalParquetGenerator.java b/src/main/java/io/deephaven/benchmark/generator/LocalParquetGenerator.java index 884e5d33..d57b70d2 100644 --- a/src/main/java/io/deephaven/benchmark/generator/LocalParquetGenerator.java +++ b/src/main/java/io/deephaven/benchmark/generator/LocalParquetGenerator.java @@ -1,4 +1,4 @@ -/* Copyright (c) 2022-2026 Deephaven Data Labs and Patent Pending */ +/* Copyright (c) 2026-2026 Deephaven Data Labs and Patent Pending */ package io.deephaven.benchmark.generator; import java.nio.file.Path; @@ -29,14 +29,13 @@ public class LocalParquetGenerator implements Generator { final private AtomicBoolean isClosed = new AtomicBoolean(false); /** - * Initialize with kafka server and schema registry locations, kafka topic, column definitions, and compression - * scheme + * Create a local Parquet generator with the provided column definitions and output file. The column definitions + * determine the schema of the Parquet file and the data generated for each column. * - * @param bootstrapServers the kafka external location (ex. localhost:9092) - * @param schemaRegistryUrl the ReST schema registry location (ex. localhost:8081) - * @param topic the kafka topic to produce record to (ex. mytable) - * @param columnDefs the column definitions specifying what the data looks like - * @param compression one of Kafka's ProducerConfig.COMPRESSION_TYPE_CONFIG schemes + * @param parquetFile output Parquet file path + * @param topic topic name (used for logging and schema generation) + * @param columnDefs column definitions that determine the schema and generated data + * @param compression compression type for Parquet file (e.g. "SNAPPY", "GZIP", "UNCOMPRESSED") */ public LocalParquetGenerator(String parquetFile, String topic, ColumnDefs columnDefs, String compression) { this.topic = topic; @@ -60,10 +59,11 @@ public Future produce(int perRecordPauseMillis, long maxRecordCount, in public Metrics call() { final long maxDuration = maxDurationSecs * 1000; final long beginTime = System.currentTimeMillis(); + final int columnDefsCount = columnDefs.getCount(); + final var rec = new Row(schema, new ArrayList<>(columnDefs.getCount())); long recCount = 0; long duration = 0; boolean isDone = false; - var rec = new Row(schema, new ArrayList<>(columnDefs.getCount())); while (!isClosed.get() && !isDone) { try { if (recCount >= maxRecordCount) { @@ -71,7 +71,7 @@ public Metrics call() { continue; } // Build a record with the column defs for Parquet row write - for (int i = 0, n = columnDefs.getCount(); i < n; i++) { + for (int i = 0, n = columnDefsCount; i < n; i++) { var v = columnDefs.nextValue(i, recCount, maxRecordCount); rec.addValue(v); } @@ -151,7 +151,7 @@ private String getFieldType(String type) { case "double" -> "double"; case "float" -> "float"; case "string" -> "binary"; - case "timestamp-millis" -> "google.protobuf.Timestamp"; + case "timestamp-millis" -> "int64"; default -> throw new RuntimeException("Unsupported generator data type: " + type); }; } @@ -159,6 +159,7 @@ private String getFieldType(String type) { private String getCharEncoding(String type) { return switch (type) { case "string" -> "(UTF8)"; + case "timestamp-millis" -> "(TIMESTAMP(MILLIS,true))"; default -> ""; }; } From c552c01ae7190154dca661883f88d62dd5e4c10d Mon Sep 17 00:00:00 2001 From: stanbrub Date: Thu, 26 Mar 2026 15:32:16 -0600 Subject: [PATCH 06/25] Revert BasicMathCombo --- .../tests/standard/aggby/BasicMathComboTest.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/aggby/BasicMathComboTest.java b/src/it/java/io/deephaven/benchmark/tests/standard/aggby/BasicMathComboTest.java index b9176a31..a3cbc5da 100644 --- a/src/it/java/io/deephaven/benchmark/tests/standard/aggby/BasicMathComboTest.java +++ b/src/it/java/io/deephaven/benchmark/tests/standard/aggby/BasicMathComboTest.java @@ -12,7 +12,7 @@ public class BasicMathComboTest { @BeforeEach void setup() { - runner.setRowFactor(2); + runner.setRowFactor(3); runner.tables("source"); var setupStr = """ @@ -28,19 +28,19 @@ void setup() { @Test void mathComboAggBy7Ops0Groups() { - runner.setScaleFactors(50, 25); + runner.setScaleFactors(20, 9); var q = "source.agg_by(aggs)"; runner.test("MathCombo-AggBy- 7 Ops No Groups", 1, q, "num1", "num2"); } - @Test @Disabled + @Test void mathComboAggBy7Ops1Group() { runner.setScaleFactors(9, 4); var q = "source.agg_by(aggs, by=['key1'])"; runner.test("MathCombo-AggBy- 7 Ops 1 Group 100 Unique Vals ", 100, q, "key1", "num1", "num2"); } - @Test @Disabled + @Test void mathComboAggBy7Ops2Groups() { runner.setScaleFactors(2, 1); var q = "source.agg_by(aggs, by=['key1', 'key2'])"; From 62aa96ae19bf0da17d654df964847885b6ea1910 Mon Sep 17 00:00:00 2001 From: stanbrub Date: Thu, 26 Mar 2026 15:33:15 -0600 Subject: [PATCH 07/25] Revert BasicMathCombo --- .../benchmark/tests/standard/aggby/BasicMathComboTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/aggby/BasicMathComboTest.java b/src/it/java/io/deephaven/benchmark/tests/standard/aggby/BasicMathComboTest.java index a3cbc5da..2614954e 100644 --- a/src/it/java/io/deephaven/benchmark/tests/standard/aggby/BasicMathComboTest.java +++ b/src/it/java/io/deephaven/benchmark/tests/standard/aggby/BasicMathComboTest.java @@ -49,7 +49,6 @@ void mathComboAggBy7Ops2Groups() { @Test void mathComboAggBy7Ops3Groups() { - runner.setScaleFactors(2, 1); var q = "source.agg_by(aggs, by=['key1', 'key2', 'key3'])"; runner.test("MathCombo-AggBy- 7 Ops 3 Groups 100K Unique Combos ", 90900, q, "key1", "key2", "key3", "num1", "num2"); From f78ca2287f878626cb2725794228b94da00eea8c Mon Sep 17 00:00:00 2001 From: stanbrub Date: Thu, 26 Mar 2026 15:38:16 -0600 Subject: [PATCH 08/25] Reverted scale and disabled for pre-train standard tests used for previous GC pass --- .../benchmark/tests/standard/join/NaturalJoinTest.java | 2 +- .../benchmark/tests/standard/sort/SortComboTest.java | 2 +- .../benchmark/tests/standard/updateby/RollingComboTest.java | 4 ++-- .../deephaven/benchmark/tests/standard/where/WhereTest.java | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/join/NaturalJoinTest.java b/src/it/java/io/deephaven/benchmark/tests/standard/join/NaturalJoinTest.java index 7c7dd2df..fc8406be 100644 --- a/src/it/java/io/deephaven/benchmark/tests/standard/join/NaturalJoinTest.java +++ b/src/it/java/io/deephaven/benchmark/tests/standard/join/NaturalJoinTest.java @@ -23,7 +23,7 @@ void NaturalJoinOn1Col() { runner.test("NaturalJoin- Join On 1 Col", q, "key5", "num1"); } - @Test @Disabled + @Test void NaturalJoinOn2Cols() { setup(6); var q = "source.natural_join(right, on=['key1 = r_wild', 'key2 = r_key2'])"; diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/sort/SortComboTest.java b/src/it/java/io/deephaven/benchmark/tests/standard/sort/SortComboTest.java index b444508f..f89b2eed 100644 --- a/src/it/java/io/deephaven/benchmark/tests/standard/sort/SortComboTest.java +++ b/src/it/java/io/deephaven/benchmark/tests/standard/sort/SortComboTest.java @@ -23,7 +23,7 @@ void sort2ColsAscendDescend() { runner.test("Sort- Both Directions 2 Cols", q, "key1", "key2", "num1"); } - @Test @Disabled + @Test void sort3ColsAscendDescend() { var q = """ source.sort(order_by=['key1', 'key2', 'key3'], order=[SortDirection.ASCENDING, SortDirection.DESCENDING, diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/updateby/RollingComboTest.java b/src/it/java/io/deephaven/benchmark/tests/standard/updateby/RollingComboTest.java index 7d40f933..1516abb5 100644 --- a/src/it/java/io/deephaven/benchmark/tests/standard/updateby/RollingComboTest.java +++ b/src/it/java/io/deephaven/benchmark/tests/standard/updateby/RollingComboTest.java @@ -61,7 +61,7 @@ void rollingCombo0Groups6Ops() { runner.test("RollingCombo- No Groups 12 Cols", q, "num1", "num2", "timestamp"); } - @Test @Disabled + @Test void rollingCombo1Groups6Ops() { setup.factors(2, 1, 1); runner.addSetupQuery(group100); @@ -71,7 +71,7 @@ void rollingCombo1Groups6Ops() { runner.test("RollingCombo- 1 Groups 100 Unique Vals", q, "key1", "num1", "num2", "timestamp"); } - @Test @Disabled + @Test void rollingCombo2Groups6Ops() { setup.factors(1, 2, 1); runner.addSetupQuery(group10K); diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/where/WhereTest.java b/src/it/java/io/deephaven/benchmark/tests/standard/where/WhereTest.java index 4a90e96f..02dd605a 100644 --- a/src/it/java/io/deephaven/benchmark/tests/standard/where/WhereTest.java +++ b/src/it/java/io/deephaven/benchmark/tests/standard/where/WhereTest.java @@ -26,7 +26,7 @@ void where1Filter() { runner.test("Where- 1 Filter", q, "key1", "num1"); } - @Test @Disabled + @Test void where2Filters() { runner.setScaleFactors(310, 300); var q = """ From e5412e738ee5f01f49f26552005173b5793b3203 Mon Sep 17 00:00:00 2001 From: stanbrub Date: Mon, 30 Mar 2026 23:09:12 -0600 Subject: [PATCH 09/25] Parallelized local parquet. worked around directory link failures --- pom.xml | 11 +- .../tests/standard/StandardTestRunner.java | 6 +- .../benchmark/tests/train/AggByTrainTest.java | 4 +- .../tests/train/FilterTrainTest.java | 9 +- .../tests/train/FormulaTrainTest.java | 6 +- .../tests/train/NaturalJoinTrainTest.java | 2 +- .../tests/train/OrderedTrainTest.java | 1 - .../tests/train/TrainTestRunner.java | 2 +- .../tests/train/UpdateByTrainTest.java | 4 +- .../deephaven/benchmark/api/BenchTable.java | 79 +++++++---- .../io/deephaven/benchmark/api/Snippets.java | 43 ++++++ .../benchmark/generator/ColumnDefs.java | 21 ++- .../generator/LocalParquetGenerator.java | 133 ++++++++---------- .../io/deephaven/benchmark/util/Filer.java | 17 ++- .../deephaven/benchmark/util/FilerTest.java | 9 +- .../benchmark/util/filerfolder/filer1.txt | 1 + .../util/filerfolder/folder1/filer2.txt | 1 + 17 files changed, 214 insertions(+), 135 deletions(-) create mode 100644 src/test/resources/io/deephaven/benchmark/util/filerfolder/filer1.txt create mode 100644 src/test/resources/io/deephaven/benchmark/util/filerfolder/folder1/filer2.txt diff --git a/pom.xml b/pom.xml index c0c7c2dd..70a4bcfa 100644 --- a/pom.xml +++ b/pom.xml @@ -271,17 +271,16 @@ kafka-protobuf-serializer 8.1.1 - - io.deephaven - deephaven-java-client-barrage-dagger - 41.3 - - blue.strategic.parquet parquet-floor 1.64 + + io.deephaven + deephaven-java-client-barrage-dagger + 41.3 + io.deephaven deephaven-log-to-slf4j diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java b/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java index a36c5647..065482f1 100644 --- a/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java +++ b/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java @@ -232,7 +232,7 @@ String getReadOperation(int scaleFactor, long rowCount, String... loadColumns) { if (scaleFactor > 1 && mainTable.equals("timed") && Arrays.asList(loadColumns).contains("timestamp")) { var read = """ merge([ - read('/data/timed.parquet').view(formulas=[${loadColumns}])${headRows} + bench_api_read('/data/timed.parquet').view(formulas=[${loadColumns}])${headRows} ] * ${scaleFactor}).update_view([ 'timestamp=timestamp.plusMillis((long)(ii / ${rows}) * ${rows})' ]).${selectStr}() @@ -241,7 +241,7 @@ String getReadOperation(int scaleFactor, long rowCount, String... loadColumns) { return read.replace("${scaleFactor}", "" + scaleFactor).replace("${rows}", "" + rowCount); } - var read = "read('/data/${mainTable}.parquet')${headRows}.${selectStr}(formulas=[${loadColumns}])"; + var read = "bench_api_read('/data/${mainTable}.parquet')${headRows}.${selectStr}(formulas=[${loadColumns}])"; read = (loadColumns.length == 0) ? ("empty_table(${rows})") : read; if (scaleFactor > 1) { @@ -379,7 +379,7 @@ String listStr(String... values) { } String loadSupportTables() { - return supportTables.stream().map(t -> t + " = read('/data/" + t + ".parquet').select()\n") + return supportTables.stream().map(t -> t + " = bench_api_read('/data/" + t + ".parquet').select()\n") .collect(Collectors.joining("")); } diff --git a/src/it/java/io/deephaven/benchmark/tests/train/AggByTrainTest.java b/src/it/java/io/deephaven/benchmark/tests/train/AggByTrainTest.java index be031ebf..1095058d 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/AggByTrainTest.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/AggByTrainTest.java @@ -26,14 +26,14 @@ void setup(double rowFactor) { @Test void aggBy0Groups() { - setup(40); + setup(120); var q = "timed.agg_by(aggs)"; runner.test("AggBy- No Groups", 1, q, "num1", "num2"); } @Test void aggBy2Groups() { - setup(20); + setup(21); var q = "timed.agg_by(aggs, by=['key1', 'key2'])"; runner.test("AggBy- 2 Groups 10K Unique Combos ", 10100, q, "key1", "key2", "num1", "num2"); } diff --git a/src/it/java/io/deephaven/benchmark/tests/train/FilterTrainTest.java b/src/it/java/io/deephaven/benchmark/tests/train/FilterTrainTest.java index 96636b25..a3396ee4 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/FilterTrainTest.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/FilterTrainTest.java @@ -1,8 +1,7 @@ -/* Copyright (c) 2022-2024 Deephaven Data Labs and Patent Pending */ +/* Copyright (c) 2026-2026 Deephaven Data Labs and Patent Pending */ package io.deephaven.benchmark.tests.train; import org.junit.jupiter.api.*; -import io.deephaven.benchmark.tests.standard.StandardTestRunner; /** * Standard tests for the whereIn table operation. Filters rows of data from the source table where the rows match @@ -27,14 +26,14 @@ void setup(double rowFactor) { @Test void filter1Col() { - setup(40); + setup(390); var q = "timed.where_in(where_filter, cols=['key1 = set1']).where(['key1 < `4`'])"; runner.test("Filter- 1 Col", 0, q, "key1", "num1"); } - + @Test void filter3Cols() { - setup(40); + setup(390); var q = """ timed.where_in(where_filter, cols=['key1 = set1', 'key2 = set2', 'key3 = set3']) \ .where(filters=["key1 = '1'", "key2 < '100'", "key3 in -2, -1, 0, 1, 2"]) diff --git a/src/it/java/io/deephaven/benchmark/tests/train/FormulaTrainTest.java b/src/it/java/io/deephaven/benchmark/tests/train/FormulaTrainTest.java index 00b263c6..6b0f11cb 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/FormulaTrainTest.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/FormulaTrainTest.java @@ -16,7 +16,7 @@ void setup(double rowFactor) { @Test void formulaUdf() { - setup(5); + setup(4.8); var setup = """ def f_py(num1: float, num2: float) -> float: return (num2 + num1) / 2 @@ -30,14 +30,14 @@ def f_np(num1: np.float64, num2: np.float64) -> np.float64: @Test void formulaInline() { - setup(40); + setup(220); var q = "timed.view(['New1 = (float)((num2 + num1) / 2)', 'New2 = (float)(num1 + num2)']).sum_by()"; runner.test("Formula- Inline 2 Calcs", 1, q, "num1", "num2"); } @Test void formulaDate() { - setup(1.75); + setup(1.8); var q = """ timed.view([ 'New1 = parseDuration(`PT4H52M14S`).toHours()', diff --git a/src/it/java/io/deephaven/benchmark/tests/train/NaturalJoinTrainTest.java b/src/it/java/io/deephaven/benchmark/tests/train/NaturalJoinTrainTest.java index b8d12107..34286996 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/NaturalJoinTrainTest.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/NaturalJoinTrainTest.java @@ -16,7 +16,7 @@ void setup(double rowFactor) { @Test void naturalJoinOn1Col() { - setup(40); + setup(60); var r = "right = right.select_distinct(['r_wild'])"; runner.addSetupQuery(r); var q = "timed.natural_join(right, on=['key1 = r_wild'])"; diff --git a/src/it/java/io/deephaven/benchmark/tests/train/OrderedTrainTest.java b/src/it/java/io/deephaven/benchmark/tests/train/OrderedTrainTest.java index 7e27bb40..0a6978fe 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/OrderedTrainTest.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/OrderedTrainTest.java @@ -2,7 +2,6 @@ package io.deephaven.benchmark.tests.train; import org.junit.jupiter.api.*; -import io.deephaven.benchmark.tests.standard.StandardTestRunner; /** * Training tests for the aggBy table operations that do ordering (e.g.. median, percentile, sorted_first/last). See diff --git a/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java b/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java index 38b3a4cf..593640c3 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java @@ -14,7 +14,7 @@ * versions and GC types. */ final public class TrainTestRunner { - static final int maxRowFactor = 40; + static final int maxRowFactor = 500; final StandardTestRunner delegate; final long baseRowCount; diff --git a/src/it/java/io/deephaven/benchmark/tests/train/UpdateByTrainTest.java b/src/it/java/io/deephaven/benchmark/tests/train/UpdateByTrainTest.java index 4af63515..9ed28657 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/UpdateByTrainTest.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/UpdateByTrainTest.java @@ -34,7 +34,7 @@ void setup(double rowFactor) { @Test void mixedComboNoGroups() { - setup(10); + setup(9.8); runner.addSetupQuery(noGroups); var q = "timed.update_by(ops=[avg_contains, max_before, prod_after, ema_tick_op, min_op, sum_op])"; runner.test("UpdateBy- No Groups 12 Cols", 0, q, "num1", "num2", "timestamp"); @@ -42,7 +42,7 @@ void mixedComboNoGroups() { @Test void rollingCombo2Groups() { - setup(3); + setup(2.8); runner.addSetupQuery(group10K); var q = """ timed.update_by(ops=[avg_contains,max_before,prod_after,ema_tick_op,min_op,sum_op], by=['key1','key2']) diff --git a/src/main/java/io/deephaven/benchmark/api/BenchTable.java b/src/main/java/io/deephaven/benchmark/api/BenchTable.java index 9a090d1b..11ccd10e 100644 --- a/src/main/java/io/deephaven/benchmark/api/BenchTable.java +++ b/src/main/java/io/deephaven/benchmark/api/BenchTable.java @@ -4,15 +4,14 @@ import java.io.Closeable; import java.time.Duration; import java.time.temporal.ChronoUnit; +import java.util.ArrayList; +import java.util.List; import java.util.concurrent.Future; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; import io.deephaven.benchmark.generator.*; import io.deephaven.benchmark.metric.Metrics; -import io.deephaven.benchmark.util.Ids; -import io.deephaven.benchmark.util.Log; -import io.deephaven.benchmark.util.Numbers; -import io.deephaven.benchmark.util.Timer; +import io.deephaven.benchmark.util.*; /** * Represents the configuration of table name and columns. @@ -25,7 +24,7 @@ final public class BenchTable implements Closeable { private int durationSecs = -1; private int rowPauseMillis = -1; private String compression = null; - private Generator generator = null; + private List generators = new ArrayList<>(); private boolean isFixed = false; private String defaultDistro = null; private String[] columnGrouping = null; @@ -226,11 +225,11 @@ public boolean generateLocalParquet() { var usedExistingParquet = new AtomicBoolean(false); var tableGenParquet = new AtomicReference(""); - var hostDataDir = new AtomicReference(""); + var dhHostOsDir = new AtomicReference(""); bench.query(q).fetchAfter("used_existing_parquet_" + tableName, table -> { usedExistingParquet.set(table.getValue(0, "UsedExistingParquet").toString().equalsIgnoreCase("true")); tableGenParquet.set(table.getValue(0, "TableGenParquet").toString()); - hostDataDir.set(table.getValue(0, "HostDataDir").toString()); + dhHostOsDir.set(table.getValue(0, "DhHostOsDir").toString()); }).execute(); if (usedExistingParquet.get()) { @@ -243,11 +242,26 @@ public boolean generateLocalParquet() { if (rowPauseMillis < 0) withRowPause(0, ChronoUnit.MILLIS); - var m = bench.awaitCompletion(generateWithLocalParquet(hostDataDir.get(), tableGenParquet.get())); - Log.info("Produce Send Rate: %.2f recs/sec", m.getValue("send.rate")); - Log.info("Produce Data Duration: %d secs", timer.duration().toSeconds()); + if (dhHostOsDir.get().isEmpty()) + throw new RuntimeException("DEEPHAVEN_HOST_OS_DIR env must be set to use local parquet generation"); + + var parquetPath = (dhHostOsDir.get() + "/" + tableGenParquet.get()).replace(".parquet", ".dataset"); + var threadCount = Runtime.getRuntime().availableProcessors(); + var rowsPerThread = getRowCount() / threadCount; + var futures = new ArrayList>(threadCount); + for (int i = 0; i < threadCount; i++) { + long rows = (i < threadCount - 1) ? rowsPerThread : (getRowCount() - (rowsPerThread * i)); + var future = generateWithLocalParquet(parquetPath, String.format("%04d.parquet", i), i, rows); + futures.add(future); + } + futures.stream().forEach(future -> bench.awaitCompletion(future)); + close(); // Needed for the final parquet flushes bench.query(localToParquetQuery).execute(); + var durMillis = timer.duration().toMillis(); + Log.info("Produce Send Rate: %.2f recs/sec", getRowCount() / (durMillis / 1000.0)); + Log.info("Produce Data Duration: %.2f secs", durMillis / 1000.0); + Log.info("Produce Write Rate: %.2f MB/sec", Filer.getByteSize(parquetPath) * 1000.0 / durMillis / 1024 / 1024); return true; } @@ -255,37 +269,41 @@ public boolean generateLocalParquet() { * Shutdown and cleanup any running generator */ public void close() { - if (generator != null) + for (Generator generator : generators) generator.close(); + generators.clear(); } private Future generateWithAvro() { String bootstrapServer = bench.property("client.redpanda.addr", "localhost:9092"); String schemaRegistry = "http://" + bench.property("client.schema.registry.addr", "localhost:8081"); - generator = new AvroKafkaGenerator(bootstrapServer, schemaRegistry, tableName, columns, getCompression()); - return generator.produce(getRowPause(), getRowCount(), getRunDuration()); + var gen = new AvroKafkaGenerator(bootstrapServer, schemaRegistry, tableName, columns, getCompression()); + generators.add(gen); + return gen.produce(getRowPause(), getRowCount(), getRunDuration()); } private Future generateWithJson() { String bootstrapServer = bench.property("client.redpanda.addr", "localhost:9092"); String schemaRegistry = "http://" + bench.property("client.schema.registry.addr", "localhost:8081"); - generator = new JsonKafkaGenerator(bootstrapServer, schemaRegistry, tableName, columns, getCompression()); - return generator.produce(getRowPause(), getRowCount(), getRunDuration()); + var gen = new JsonKafkaGenerator(bootstrapServer, schemaRegistry, tableName, columns, getCompression()); + generators.add(gen); + return gen.produce(getRowPause(), getRowCount(), getRunDuration()); } private Future generateWithProtobuf() { String bootstrapServer = bench.property("client.redpanda.addr", "localhost:9092"); String schemaRegistry = "http://" + bench.property("client.schema.registry.addr", "localhost:8081"); - generator = new ProtobufKafkaGenerator(bootstrapServer, schemaRegistry, tableName, columns, getCompression()); - return generator.produce(getRowPause(), getRowCount(), getRunDuration()); + var gen = new ProtobufKafkaGenerator(bootstrapServer, schemaRegistry, tableName, columns, getCompression()); + generators.add(gen); + return gen.produce(getRowPause(), getRowCount(), getRunDuration()); } - private Future generateWithLocalParquet(String hostDataDir, String tableGenParquet) { - if (hostDataDir.isEmpty()) - throw new RuntimeException("HOST_DATA_DIR env must be set to use local parquet generation"); - String parquetFile = hostDataDir + "/" + tableGenParquet.replaceAll("^/data/", ""); - generator = new LocalParquetGenerator(parquetFile, tableName, columns, getCompression()); - return generator.produce(getRowPause(), getRowCount(), getRunDuration()); + private Future generateWithLocalParquet(String parquetPath, String parquetPart, long startRow, + long rowCount) { + var parquetFile = Filer.createFile(parquetPath, parquetPart).toString(); + var gen = new LocalParquetGenerator(parquetFile, tableName, columns.copy(), startRow); + generators.add(gen); + return gen.produce(getRowPause(), rowCount, getRunDuration()); } private int getRowPause() { @@ -393,14 +411,17 @@ with open(path) as f: usedExisting = False matching_gen_parquet = findMatchingGenParquet(table_gen_def_text) - if matching_gen_parquet is not None and os.path.exists(str(matching_gen_parquet) + '.gen.parquet'): - os.link(str(matching_gen_parquet) + '.gen.parquet', table_parquet) + if matching_gen_parquet and os.path.exists(f"{matching_gen_parquet}.gen.parquet"): + bench_api_link(str(matching_gen_parquet) + '.gen.parquet', table_parquet) + usedExisting = True + if matching_gen_parquet and os.path.exists(f"{matching_gen_parquet}.gen.dataset"): + bench_api_link(str(matching_gen_parquet) + '.gen.dataset', table_parquet) usedExisting = True used_existing_parquet_${table.name} = new_table([ string_col("UsedExistingParquet", [str(usedExisting)]), string_col("TableGenParquet", [table_gen_parquet]), - string_col("HostDataDir", [os.getenv("HOST_DATA_DIR","")]) + string_col("DhHostOsDir", [os.getenv("DEEPHAVEN_HOST_OS_DIR","")]) ]) """; @@ -434,7 +455,7 @@ with open(table_gen_def_file, 'w') as f: column_grouping=${column.grouping} if column_grouping: ${table.name} = ${table.name}.sort([${column.grouping}]) write(${table.name}, table_gen_parquet ${compression.codec} ${max.dict.keys} ${max.dict.bytes} ${target.page.bytes}) - os.link(table_gen_parquet, table_parquet) + bench_api_link(table_gen_parquet, table_parquet) del ${table.name} @@ -443,7 +464,7 @@ with open(table_gen_def_file, 'w') as f: """; static final String localToParquetQuery = """ - # Link an already created parquet file + # Link an already created parquet dataset directory import jpy, os if os.path.exists(table_parquet): @@ -452,7 +473,7 @@ with open(table_gen_def_file, 'w') as f: with open(table_gen_def_file, 'w') as f: f.write(table_gen_def_text) - os.link(table_gen_parquet, table_parquet) + bench_api_link(table_gen_parquet.replace(".parquet", ".dataset"), table_parquet) from deephaven import garbage_collect garbage_collect() diff --git a/src/main/java/io/deephaven/benchmark/api/Snippets.java b/src/main/java/io/deephaven/benchmark/api/Snippets.java index 07ad6f5e..191b0c82 100644 --- a/src/main/java/io/deephaven/benchmark/api/Snippets.java +++ b/src/main/java/io/deephaven/benchmark/api/Snippets.java @@ -186,6 +186,47 @@ def bench_api_metrics_collect(): return t """; + /** + * Make a file containing a one line reference to another file. Note: This is to get around the fact that + * Deephaven's parquet can't read from symbolic links that are directories. + *

+ * ex. bench_api_link('my_parquet_dir_or_file', 'my_link_name') + * + * @param target the table to link + * @param link_name the name to link the table to for retrieval + */ + static String bench_api_link = """ + import os, glob + def bench_api_link(target, link_name): + for f in glob.glob(link_name + '*'): + os.remove(f) + if target.endswith('.dataset'): + with open(link_name + '.link', 'w') as f: + f.write(target) + else: + os.link(target, link_name) + """; + + /** + * Read a parquet file or dataset into a Deephaven table. If the filename is a link (e.g. ".link") grab the file + * reference within it. + *

+ * ex. source = bench_api_read('/data/timed.parquet') + * + * @param file_name the name of the file containing the link reference + * @return a table containing the contents of the linked parquet file or dataset + */ + static String bench_api_read = """ + import os + from deephaven.parquet import read + def bench_api_read(file_name): + link_path = file_name + '.link' + if os.path.exists(link_path): + with open(link_path, 'r') as f: + file_name = f.read().strip() + return read(file_name) + """; + /** * Returns a query containing the api functions called by the query * @@ -206,6 +247,8 @@ static String getFunctions(String query) { defs += getFunc("bench_api_metrics_add", bench_api_metrics_add, query, defs); defs += getFunc("bench_api_metrics_collect", bench_api_metrics_collect, query, defs); defs += getFunc("bench_api_await_column_value_limit", bench_api_await_column_value_limit, query, defs); + defs += getFunc("bench_api_link", bench_api_link, query, defs); + defs += getFunc("bench_api_read", bench_api_read, query, defs); return defs; } diff --git a/src/main/java/io/deephaven/benchmark/generator/ColumnDefs.java b/src/main/java/io/deephaven/benchmark/generator/ColumnDefs.java index 70433815..8d57292f 100644 --- a/src/main/java/io/deephaven/benchmark/generator/ColumnDefs.java +++ b/src/main/java/io/deephaven/benchmark/generator/ColumnDefs.java @@ -1,4 +1,4 @@ -/* Copyright (c) 2022-2023 Deephaven Data Labs and Patent Pending */ +/* Copyright (c) 2022-2026 Deephaven Data Labs and Patent Pending */ package io.deephaven.benchmark.generator; import java.util.*; @@ -18,10 +18,10 @@ final public class ColumnDefs { private String defaultDistribution = "random"; /** - * Initialize the instance with a default cache size of 1024 + * Initialize the instance with a default cache size large enough to cover typical column value ranges */ public ColumnDefs() { - this(1024); + this(2_000_000); } ColumnDefs(int valueCacheSize) { @@ -105,6 +105,21 @@ public ColumnDefs add(String name, String type, String valueDef) { return add(name, type, valueDef, null); } + /** + * Create an independent copy of this column definitions instance. Each copy has its own Maker objects, value + * caches, and distribution functions, making it safe to use from a separate thread without contention. + * + * @return a new independent ColumnDefs with the same column definitions + */ + public ColumnDefs copy() { + var c = new ColumnDefs(valueCacheSize); + c.defaultDistribution = defaultDistribution; + for (ColumnDef col : columns) { + c.add(col.name(), col.type(), col.valueDef(), col.maker().distributionName); + } + return c; + } + /** * Get the next value for the column in the given index according to the columns defined distribution. * diff --git a/src/main/java/io/deephaven/benchmark/generator/LocalParquetGenerator.java b/src/main/java/io/deephaven/benchmark/generator/LocalParquetGenerator.java index d57b70d2..5a4b4793 100644 --- a/src/main/java/io/deephaven/benchmark/generator/LocalParquetGenerator.java +++ b/src/main/java/io/deephaven/benchmark/generator/LocalParquetGenerator.java @@ -1,17 +1,17 @@ /* Copyright (c) 2026-2026 Deephaven Data Labs and Patent Pending */ package io.deephaven.benchmark.generator; -import java.nio.file.Path; -import java.nio.file.Paths; +import java.io.File; +import java.io.IOException; import java.util.*; import java.util.concurrent.*; import java.util.concurrent.atomic.AtomicBoolean; +import blue.strategic.parquet.*; import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.MessageTypeParser; import io.deephaven.benchmark.metric.Metrics; import io.deephaven.benchmark.util.Log; import io.deephaven.benchmark.util.Threads; -import blue.strategic.parquet.*; /** * Generator that produces rows to a local Parquet file according to the provided column definitions. Note: This @@ -21,80 +21,77 @@ */ public class LocalParquetGenerator implements Generator { final private ExecutorService queue = Threads.single("LocalParquetGenerator"); - final private Path parquetOut; - final private ParquetWriter writer; final private ColumnDefs columnDefs; final private String topic; + final private long startSeed; final private MessageType schema; + final private File parquetFile; final private AtomicBoolean isClosed = new AtomicBoolean(false); + private ParquetWriter writer; /** * Create a local Parquet generator with the provided column definitions and output file. The column definitions * determine the schema of the Parquet file and the data generated for each column. - * + * * @param parquetFile output Parquet file path * @param topic topic name (used for logging and schema generation) * @param columnDefs column definitions that determine the schema and generated data - * @param compression compression type for Parquet file (e.g. "SNAPPY", "GZIP", "UNCOMPRESSED") + * @param startSeed starting seed for data generation */ - public LocalParquetGenerator(String parquetFile, String topic, ColumnDefs columnDefs, String compression) { + public LocalParquetGenerator(String parquetFile, String topic, ColumnDefs columnDefs, long startSeed) { this.topic = topic; this.columnDefs = columnDefs; + this.startSeed = startSeed; + this.parquetFile = new File(parquetFile); this.schema = MessageTypeParser.parseMessageType(getSchemaMessage(topic, columnDefs)); - this.parquetOut = Paths.get(parquetFile); - this.writer = createParquetWriter(schema, parquetOut); + try { + this.writer = ParquetWriter.writeFile(schema, this.parquetFile, createDehydrator()); + } catch (IOException ex) { + throw new RuntimeException("Failed to create Parquet writer for topic: " + topic, ex); + } } /** - * Produce a maximum number of records to a Kafka topic asynchronously. - * + * Produce a maximum number of records asynchronously. + * * @param perRecordPauseMillis wait time between each record sent * @param maxRecordCount maximum records to produce - * @param maxDurationSecs maximum duration to produce (May prevent maximum records from being produces) + * @param maxDurationSecs maximum duration to produce */ public Future produce(int perRecordPauseMillis, long maxRecordCount, int maxDurationSecs) { checkClosed(); - var r = new Callable() { - @Override - public Metrics call() { - final long maxDuration = maxDurationSecs * 1000; - final long beginTime = System.currentTimeMillis(); - final int columnDefsCount = columnDefs.getCount(); - final var rec = new Row(schema, new ArrayList<>(columnDefs.getCount())); - long recCount = 0; - long duration = 0; - boolean isDone = false; - while (!isClosed.get() && !isDone) { - try { - if (recCount >= maxRecordCount) { - isDone = true; - continue; - } - // Build a record with the column defs for Parquet row write - for (int i = 0, n = columnDefsCount; i < n; i++) { - var v = columnDefs.nextValue(i, recCount, maxRecordCount); - rec.addValue(v); - } - // Write the record to Parquet file - writer.write(rec); - rec.clear(); - - if (++recCount % 10_000_000 == 0) - Log.info("Produced %s records to topic '%s'", recCount, topic); - duration = System.currentTimeMillis() - beginTime; - if (duration > maxDuration) - isDone = true; - } catch (Exception ex) { - throw new RuntimeException("Failed to write to topic: " + topic, ex); - } + return queue.submit(() -> { + final long maxDuration = maxDurationSecs * 1000L; + final long beginTime = System.currentTimeMillis(); + final int colCount = columnDefs.getCount(); + + long recCount = startSeed; + long totalWritten = 0; + long duration = 0; + Object[] row = new Object[colCount]; + + while (!isClosed.get() && recCount < maxRecordCount) { + for (int i = 0; i < colCount; i++) { + row[i] = columnDefs.nextValue(i, recCount, maxRecordCount); } - Log.info("Produced %s records to topic: %s", recCount, topic); - var metrics = new Metrics("test-runner", "generate." + topic).set("duration.secs", duration / 1000.0) - .set("record.count", recCount).set("send.rate", recCount / (duration / 1000.0)); - return metrics; + writer.write(row); + recCount++; + + if (++totalWritten % 10_000_000 == 0) + Log.info("Produced %s records to topic '%s'", totalWritten, topic); + + duration = System.currentTimeMillis() - beginTime; + if (duration > maxDuration) + break; } - }; - return queue.submit(r); + + Log.info("Produced %s records to topic: %s", totalWritten, topic); + duration = System.currentTimeMillis() - beginTime; + return new Metrics("test-runner", "generate." + topic) + .set("duration.secs", duration / 1000.0) + .set("record.count", totalWritten) + .set("send.rate", totalWritten / (duration / 1000.0)); + }); } /** @@ -117,15 +114,13 @@ private void checkClosed() { throw new RuntimeException("Generator is closed"); } - private ParquetWriter createParquetWriter(MessageType schema, Path parquetOut) { - try { - Dehydrator dehydrator = (row, valueWriter) -> { - row.write(valueWriter); - }; - return ParquetWriter.writeFile(schema, parquetOut.toFile(), dehydrator); - } catch (Exception ex) { - throw new RuntimeException("Failed to create Parquet writer for topic: " + topic, ex); - } + private Dehydrator createDehydrator() { + final String[] colNames = columnDefs.toTypeMap().keySet().toArray(new String[0]); + return (row, valueWriter) -> { + for (int i = 0; i < colNames.length; i++) { + valueWriter.write(colNames[i], row[i]); + } + }; } private String getSchemaMessage(String topic, ColumnDefs fieldDefs) { @@ -164,20 +159,4 @@ private String getCharEncoding(String type) { }; } - record Row(MessageType schema, List values) { - public void addValue(Object value) { - values.add(value); - } - - public void write(ValueWriter valueWriter) { - for (int i = 0, n = values.size(); i < n; i++) { - valueWriter.write(schema.getFieldName(i), values.get(i)); - } - } - - public void clear() { - values.clear(); - } - } - } diff --git a/src/main/java/io/deephaven/benchmark/util/Filer.java b/src/main/java/io/deephaven/benchmark/util/Filer.java index b8d5bb4d..40c55583 100644 --- a/src/main/java/io/deephaven/benchmark/util/Filer.java +++ b/src/main/java/io/deephaven/benchmark/util/Filer.java @@ -1,4 +1,4 @@ -/* Copyright (c) 2022-2023 Deephaven Data Labs and Patent Pending */ +/* Copyright (c) 2022-2026 Deephaven Data Labs and Patent Pending */ package io.deephaven.benchmark.util; import static java.nio.file.StandardOpenOption.*; @@ -55,6 +55,21 @@ static public Path createFile(String parentDir, String fileName) { } } + /** + * Get the size of a file or directory in bytes. Directory sizes are calculated recursively by summing the sizes of + * all regular files contained within. + * + * @param file the file or directory to get the size of + * @return the size of the file or directory in bytes + */ + static public long getByteSize(String path) { + try { + return Files.walk(Paths.get(path)).filter(Files::isRegularFile).mapToLong(f -> f.toFile().length()).sum(); + } catch (Exception ex) { + throw new RuntimeException("Failed to get size of file: " + path, ex); + } + } + /** * Read the text of a file while preserving newlines and getting rid of carriage returns * diff --git a/src/test/java/io/deephaven/benchmark/util/FilerTest.java b/src/test/java/io/deephaven/benchmark/util/FilerTest.java index 773cd46a..53442e79 100644 --- a/src/test/java/io/deephaven/benchmark/util/FilerTest.java +++ b/src/test/java/io/deephaven/benchmark/util/FilerTest.java @@ -1,4 +1,4 @@ -/* Copyright (c) 2022-2023 Deephaven Data Labs and Patent Pending */ +/* Copyright (c) 2022-2026 Deephaven Data Labs and Patent Pending */ package io.deephaven.benchmark.util; import static org.junit.jupiter.api.Assertions.*; @@ -48,4 +48,11 @@ public void getUrlText() throws Exception { assertEquals("One Two Three\nFour Five Six", Filer.getURLText(url), "Wrong file text"); } + @Test + public void getByteSize() throws Exception { + var p = Paths.get(getClass().getResource("filerfolder").toURI()).toFile().toString(); + assertEquals(46, Filer.getByteSize(p), "Wrong byte size"); + p = Paths.get(getClass().getResource("filertest.txt").toURI()).toFile().toString(); + assertEquals(27, Filer.getByteSize(p), "Wrong byte size"); + } } diff --git a/src/test/resources/io/deephaven/benchmark/util/filerfolder/filer1.txt b/src/test/resources/io/deephaven/benchmark/util/filerfolder/filer1.txt new file mode 100644 index 00000000..9f40cf63 --- /dev/null +++ b/src/test/resources/io/deephaven/benchmark/util/filerfolder/filer1.txt @@ -0,0 +1 @@ +This file has bytes \ No newline at end of file diff --git a/src/test/resources/io/deephaven/benchmark/util/filerfolder/folder1/filer2.txt b/src/test/resources/io/deephaven/benchmark/util/filerfolder/folder1/filer2.txt new file mode 100644 index 00000000..58da37b3 --- /dev/null +++ b/src/test/resources/io/deephaven/benchmark/util/filerfolder/folder1/filer2.txt @@ -0,0 +1 @@ +This file has bytes as well \ No newline at end of file From ff4d891151e01a7d06dcaf0a87206648b7d74457 Mon Sep 17 00:00:00 2001 From: stanbrub Date: Wed, 1 Apr 2026 17:49:49 -0600 Subject: [PATCH 10/25] Added 1st pass at benchmark even retrieval with JFR --- .../tests/compare/CompareTestRunner.java | 3 + .../tests/standard/StandardTestRunner.java | 29 ++++++- .../tests/standard/file/FileTestRunner.java | 3 + .../tests/standard/kafka/KafkaTestRunner.java | 3 + .../benchmark/tests/train/AggByTrainTest.java | 1 + .../tests/train/TrainTestRunner.java | 61 +++++++++++++- .../io/deephaven/benchmark/api/Bench.java | 48 +++++++---- .../deephaven/benchmark/api/BenchEvents.java | 84 +++++++++++++++++++ .../io/deephaven/benchmark/api/BenchLog.java | 2 +- .../io/deephaven/benchmark/api/QueryLog.java | 18 +--- .../io/deephaven/benchmark/api/Snippets.java | 9 +- .../generator/LocalParquetGenerator.java | 2 +- .../io/deephaven/benchmark/util/Filer.java | 21 ++++- .../deephaven/benchmark/api/QueryLogTest.java | 16 ++-- 14 files changed, 248 insertions(+), 52 deletions(-) create mode 100644 src/main/java/io/deephaven/benchmark/api/BenchEvents.java diff --git a/src/it/java/io/deephaven/benchmark/tests/compare/CompareTestRunner.java b/src/it/java/io/deephaven/benchmark/tests/compare/CompareTestRunner.java index 755633f7..b7b31fe4 100644 --- a/src/it/java/io/deephaven/benchmark/tests/compare/CompareTestRunner.java +++ b/src/it/java/io/deephaven/benchmark/tests/compare/CompareTestRunner.java @@ -28,6 +28,9 @@ * practical purposes, though it is not ideal. */ public class CompareTestRunner { + static { + System.setProperty("root.test.package", "io.deephaven.benchmark.tests"); + } final Object testInst; final Set requiredPackages = new LinkedHashSet<>(); final Map downloadFiles = new LinkedHashMap<>(); diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java b/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java index 065482f1..9d4b23ae 100644 --- a/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java +++ b/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java @@ -21,10 +21,14 @@ * conventions are followed (ex. main file is "source") */ final public class StandardTestRunner { + static { + System.setProperty("root.test.package", "io.deephaven.benchmark.tests"); + } final Object testInst; final List supportTables = new ArrayList<>(); final List setupQueries = new ArrayList<>(); final List preOpQueries = new ArrayList<>(); + final List teardownQueries = new ArrayList<>(); final Set requiredServices = new TreeSet<>(List.of("deephaven")); private String mainTable = "source"; private Bench api; @@ -138,6 +142,16 @@ public void addPreOpQuery(String query) { preOpQueries.add(query); } + /** + * Add a query to be run after everything else is done. This is useful for teardown of any resources after the test + * is run like logging, temporary files, perf table retrieval, etc. + * + * @param query the query to run after the measured operation + */ + public void addTeardownQuery(String query) { + teardownQueries.add(query); + } + /** * The {@code scale.row.count} property supplies a default for the number of rows generated for benchmark tests. * Given that some operations use less memory than others, scaling up the generated rows per operation is more @@ -264,9 +278,11 @@ String getStaticQuery(String name, String operation, long rowCount, String... lo bench_api_metrics_start() print('${logOperationBegin}') + begin_clock = time.time_ns() begin_time = time.perf_counter_ns() result = ${operation} end_time = time.perf_counter_ns() + end_clock = time.time_ns() print('${logOperationEnd}') bench_api_metrics_end() @@ -276,7 +292,10 @@ String getStaticQuery(String name, String operation, long rowCount, String... lo double_col("elapsed_nanos", [end_time - begin_time]), long_col("processed_row_count", [loaded_tbl_size]), long_col("result_row_count", [result.size]), + long_col("begin_clock_nanos", [begin_clock]), + long_col("end_clock_nanos", [end_clock]), ]) + ${teardownQueries} """; var read = getReadOperation(staticFactor, rowCount, loadColumns); return populateQuery(name, staticQuery, operation, read, loadColumns); @@ -301,6 +320,7 @@ String getIncQuery(String name, String operation, long rowCount, String... loadC ${preOpQueries} bench_api_metrics_start() print('${logOperationBegin}') + begin_clock = time.time_ns() begin_time = time.perf_counter_ns() result = ${operation} @@ -314,6 +334,7 @@ String getIncQuery(String name, String operation, long rowCount, String... loadC source_filter.waitForCompletion() end_time = time.perf_counter_ns() + end_clock = time.time_ns() print('${logOperationEnd}') bench_api_metrics_end() standard_metrics = bench_api_metrics_collect() @@ -321,8 +342,11 @@ String getIncQuery(String name, String operation, long rowCount, String... loadC stats = new_table([ double_col("elapsed_nanos", [end_time - begin_time]), long_col("processed_row_count", [loaded_tbl_size]), - long_col("result_row_count", [result.size]) + long_col("result_row_count", [result.size]), + long_col("begin_clock_nanos", [begin_clock]), + long_col("end_clock_nanos", [end_clock]), ]) + ${teardownQueries} """; var read = getReadOperation(incFactor, rowCount, loadColumns); return populateQuery(name, incQuery, operation, read, loadColumns); @@ -336,6 +360,7 @@ String populateQuery(String name, String query, String operation, String read, S query = query.replace("${setupQueries}", String.join("\n", setupQueries)); query = query.replace("${preOpQueries}", String.join("\n", preOpQueries)); query = query.replace("${operation}", operation); + query = query.replace("${teardownQueries}", String.join("\n", teardownQueries)); query = query.replace("${logOperationBegin}", getLogSnippet("Begin", name)); query = query.replace("${logOperationEnd}", getLogSnippet("End", name)); return query; @@ -365,6 +390,8 @@ Result runTest(String name, String warmupQuery, String mainQuery) { metrics.set("inc.factor", incFactor); metrics.set("row.factor", rowCountFactor); api.metrics().add(metrics); + }).fetchAfter("standard_events", table -> { + api.events().add(table); }).execute(); api.result().test("deephaven-engine", result.get().elapsedTime(), result.get().loadedRowCount()); return result.get(); diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/file/FileTestRunner.java b/src/it/java/io/deephaven/benchmark/tests/standard/file/FileTestRunner.java index 5fadc8ba..4126feac 100644 --- a/src/it/java/io/deephaven/benchmark/tests/standard/file/FileTestRunner.java +++ b/src/it/java/io/deephaven/benchmark/tests/standard/file/FileTestRunner.java @@ -14,6 +14,9 @@ * Test reading and writing parquet files with various data types and compression codecs. */ class FileTestRunner { + static { + System.setProperty("root.test.package", "io.deephaven.benchmark.tests"); + } final String parquetCfg = "max_dictionary_keys=1048576, max_dictionary_size=1048576, target_page_size=65536"; final Object testInst; final Set requiredServices = new TreeSet<>(List.of("deephaven")); diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/kafka/KafkaTestRunner.java b/src/it/java/io/deephaven/benchmark/tests/standard/kafka/KafkaTestRunner.java index d65014d8..c919f852 100644 --- a/src/it/java/io/deephaven/benchmark/tests/standard/kafka/KafkaTestRunner.java +++ b/src/it/java/io/deephaven/benchmark/tests/standard/kafka/KafkaTestRunner.java @@ -19,6 +19,9 @@ * append/blink table types. Results are checked to ensure the correct number of rows has been processed. */ class KafkaTestRunner { + static { + System.setProperty("root.test.package", "io.deephaven.benchmark.tests"); + } final Object testInst; final Bench api; final Controller controller; diff --git a/src/it/java/io/deephaven/benchmark/tests/train/AggByTrainTest.java b/src/it/java/io/deephaven/benchmark/tests/train/AggByTrainTest.java index 1095058d..22162021 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/AggByTrainTest.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/AggByTrainTest.java @@ -25,6 +25,7 @@ void setup(double rowFactor) { } @Test + @Disabled void aggBy0Groups() { setup(120); var q = "timed.agg_by(aggs)"; diff --git a/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java b/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java index 593640c3..054112f6 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java @@ -1,7 +1,6 @@ /* Copyright (c) 2026-2026 Deephaven Data Labs and Patent Pending */ package io.deephaven.benchmark.tests.train; -import java.util.Arrays; import io.deephaven.benchmark.tests.standard.StandardTestRunner; /** @@ -14,7 +13,7 @@ * versions and GC types. */ final public class TrainTestRunner { - static final int maxRowFactor = 500; + static final int maxRowFactor = 400; final StandardTestRunner delegate; final long baseRowCount; @@ -39,7 +38,63 @@ public void addSetupQuery(String query) { } public void test(String name, long maxExpectedRowCount, String operation, String... loadColumns) { + delegate.addSetupQuery(startJfrQuery); + delegate.addTeardownQuery(stopJfrQuery); + delegate.addTeardownQuery(ugpQuery); delegate.test(name, maxExpectedRowCount, operation, loadColumns); } + + static final String startJfrQuery = """ + import jpy + Recording = jpy.get_type("jdk.jfr.Recording") + rec = Recording() + rec.setName("benchmark") + rec.start() + """; + + static final String stopJfrQuery = """ + Paths = jpy.get_type("java.nio.file.Paths") + RecordingFile = jpy.get_type("jdk.jfr.consumer.RecordingFile") + rec.dump(Paths.get("/data/benchmark.jfr")) + rec.stop() + rec.close() + events = RecordingFile.readAllEvents(Paths.get("/data/benchmark.jfr")) -} + # Log each event's fields to the console for inspection + print("=== JFR event dump begin ===") + for i in range(events.size()): + e = events.get(i) + etype = e.getEventType() + print(f"Event {i}: type={etype.getName()}") + fields = e.getFields() + for idx in range(fields.size()): + fd = fields.get(idx) + fname = fd.getName() + fval = e.getValue(fname) + print(f" {fname} = {fval}") + print("--") + print("=== JFR event dump end ===") + + jfr_rows = [] + for i in range(events.size()): + e = events.get(i) + start = e.getStartTime().getEpochSecond() * 1000000000 + e.getStartTime().getNano() + dur = e.getDuration().getSeconds() * 1000000000 + e.getDuration().getNano() + jfr_rows.append([str(e.getEventType().getName()), start, dur, str(e)]) + jfr = new_table([ + string_col("origin", ["jfr" for r in jfr_rows]), + string_col("type", [r[0] for r in jfr_rows]), + long_col("start_ns", [r[1] for r in jfr_rows]), + long_col("duration_ns", [r[2] for r in jfr_rows]), + string_col("detail", [r[3] for r in jfr_rows]), + ]) + standard_events = merge([standard_events, jfr]) + """; + + static final String ugpQuery = """ + from deephaven import write_csv + import deephaven.perfmon as pm + ugp = pm.update_performance_log() + write_csv(ugp, "/data/ugp_cycles.csv") + """; +} \ No newline at end of file diff --git a/src/main/java/io/deephaven/benchmark/api/Bench.java b/src/main/java/io/deephaven/benchmark/api/Bench.java index f6225086..1d3e5ab6 100644 --- a/src/main/java/io/deephaven/benchmark/api/Bench.java +++ b/src/main/java/io/deephaven/benchmark/api/Bench.java @@ -1,8 +1,7 @@ -/* Copyright (c) 2022-2025 Deephaven Data Labs and Patent Pending */ +/* Copyright (c) 2022-2026 Deephaven Data Labs and Patent Pending */ package io.deephaven.benchmark.api; import java.io.Closeable; -import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.time.Duration; @@ -11,9 +10,7 @@ import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import io.deephaven.benchmark.metric.Metrics; -import io.deephaven.benchmark.util.Filer; -import io.deephaven.benchmark.util.Ids; -import io.deephaven.benchmark.util.Timer; +import io.deephaven.benchmark.util.*; /** * The root accessor class for the API. Use Bench.create(this) in a typical JUnit test to start things off @@ -36,6 +33,10 @@ final public class Bench { * The name of the benchmark metrics csv file */ static final public String metricsFileName = "benchmark-metrics.csv"; + /** + * The name of the benchmark events csv file + */ + static final public String eventsFileName = "benchmark-events.csv"; /** * The name of the benchmark platform csv file */ @@ -62,9 +63,9 @@ static public Bench create(Object testInst) { return v; } - final Object testInst; final BenchResult result; final BenchMetrics metrics; + final BenchEvents events; final BenchPlatform platform; final QueryLog queryLog; final BenchLog runLog; @@ -73,13 +74,13 @@ static public Bench create(Object testInst) { final Session session = new Session(); private boolean isClosed = false; - Bench(Class testInst) { - this.testInst = testInst; + Bench(Class testClass) { this.result = new BenchResult(outputDir); this.metrics = new BenchMetrics(outputDir); + this.events = new BenchEvents(outputDir); this.platform = new BenchPlatform(this, outputDir); - this.queryLog = new QueryLog(outputDir, testInst); - this.runLog = new BenchLog(outputDir, testInst); + this.queryLog = new QueryLog(getLogDir(testClass), testClass); + this.runLog = new BenchLog(getLogDir(testClass), testClass); } /** @@ -92,6 +93,7 @@ public void setName(String name) { throw new RuntimeException("No blank Benchmark names allowed"); this.result.setName(name); this.metrics.setName(name); + this.events.setName(name); this.queryLog.setName(name); this.runLog.setName(name); } @@ -200,6 +202,15 @@ public BenchMetrics metrics() { return metrics; } + /** + * Get the events for this Benchmark instance (e.g. test) used for collecting event values + * + * @return the events instance + */ + public BenchEvents events() { + return events; + } + /** * Get the platform for this Benchmark instance (e.g. test) used for collecting platform properties * @@ -210,9 +221,9 @@ public BenchPlatform platform() { } /** - * Get the metrics for this Benchmark instance (e.g. test) used for collecting metric values + * Get the query log for this Benchmark instance (e.g. test) used for recording queries * - * @return the metrics instance + * @return the query log instance */ public BenchLog log() { return runLog; @@ -244,6 +255,7 @@ public void close() { closeables.clear(); result.commit(); metrics.commit(); + events.commit(); platform.commit(); runLog.close(); queryLog.close(); @@ -271,6 +283,12 @@ > T addFuture(T future) { return future; } + static private Path getLogDir(Class testClass) { + var pkgRoot = profile.property("root.test.package", Bench.class.getPackageName().replaceAll("[.][^.]+$", "")); + var name = testClass.getPackageName().replaceAll(pkgRoot + '.', "") + '.' + testClass.getSimpleName(); + return Filer.createDirectory(outputDir.resolve("test-logs").resolve(name).toString()); + } + static private Path initializeOutputDirectory() { setSystemProperties(); boolean isTimestamped = profile.propertyAsBoolean("timestamp.test.results", "false"); @@ -278,11 +296,7 @@ static private Path initializeOutputDirectory() { if (isTimestamped) dir = dir.resolve(Ids.runId()); Filer.delete(dir); - try { - return Files.createDirectories(dir); - } catch (Exception ex) { - throw new RuntimeException("Failed initialize benchmark result directory: " + dir, ex); - } + return Filer.createDirectory(dir.toString()); } static private void setSystemProperties() { diff --git a/src/main/java/io/deephaven/benchmark/api/BenchEvents.java b/src/main/java/io/deephaven/benchmark/api/BenchEvents.java new file mode 100644 index 00000000..f3d715f0 --- /dev/null +++ b/src/main/java/io/deephaven/benchmark/api/BenchEvents.java @@ -0,0 +1,84 @@ +/* Copyright (c) 2026-2026 Deephaven Data Labs and Patent Pending */ +package io.deephaven.benchmark.api; + +import java.io.BufferedWriter; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.*; +import io.deephaven.benchmark.connect.ResultTable; + +/** + * Represents the events gathered during usage of the Bench API. These can include events gather by the API or the user. + */ +final public class BenchEvents { + static final String header = "benchmark_name,origin,type,start,duration,detail"; + final List events = new ArrayList<>(); + final Path file; + private String name = null; + + BenchEvents(Path parent) { + this(parent, Bench.eventsFileName); + } + + BenchEvents(Path parent, String resultFileName) { + this.file = parent.resolve(resultFileName); + } + + /** + * Add the results from a table as events to persist to the file system. This table must have columns defined as + * origin, type, start_ns, duration_ns, detail + * + * @param table a table containing events + * @return this instance + */ + public BenchEvents add(ResultTable table) { + for (int r = 0, rn = table.getRowCount(); r < rn; r++) { + var origin = table.getValue(r, "origin").toString(); + var type = table.getValue(r, "type").toString(); + var startNanos = table.getNumber(r, "start_ns").longValue(); + var durationNanos = table.getNumber(r, "duration_ns").longValue(); + var details = table.getValue(r, "detail").toString(); + var event = new Event(origin, type, startNanos, durationNanos, details); + events.add(event); + } + return this; + } + + /** + * Save the collected events to a csv file. + */ + public void commit() { + if (!hasHeader()) + writeLine(header, file); + + for (Event event : events) { + var line = name + ',' + event.toCsv(); + writeLine(line, file); + } + } + + void setName(String name) { + this.name = name; + } + + private boolean hasHeader() { + return Files.exists(file); + } + + static void writeLine(String line, Path file) { + try (BufferedWriter out = Files.newBufferedWriter(file, StandardOpenOption.CREATE, StandardOpenOption.APPEND)) { + out.write(line); + out.newLine(); + } catch (Exception ex) { + throw new RuntimeException("Failed to write result to file: " + file, ex); + } + } + + record Event(String origin, String type, long startNanos, long durationNanos, String detail) { + String toCsv() { + return origin + "," + type + "," + startNanos + "," + durationNanos + "," + detail; + } + } + +} \ No newline at end of file diff --git a/src/main/java/io/deephaven/benchmark/api/BenchLog.java b/src/main/java/io/deephaven/benchmark/api/BenchLog.java index 3f4de79b..68416595 100644 --- a/src/main/java/io/deephaven/benchmark/api/BenchLog.java +++ b/src/main/java/io/deephaven/benchmark/api/BenchLog.java @@ -26,7 +26,7 @@ final public class BenchLog { BenchLog(Path parent, Class testClass) { this.testClass = testClass; this.parent = parent; - this.logFile = getLogFile(parent, testClass); + this.logFile = parent.resolve("engine.log"); } /** diff --git a/src/main/java/io/deephaven/benchmark/api/QueryLog.java b/src/main/java/io/deephaven/benchmark/api/QueryLog.java index b14b615e..bf307893 100644 --- a/src/main/java/io/deephaven/benchmark/api/QueryLog.java +++ b/src/main/java/io/deephaven/benchmark/api/QueryLog.java @@ -1,4 +1,4 @@ -/* Copyright (c) 2022-2023 Deephaven Data Labs and Patent Pending */ +/* Copyright (c) 2022-2026 Deephaven Data Labs and Patent Pending */ package io.deephaven.benchmark.api; import static java.nio.file.StandardOpenOption.*; @@ -30,7 +30,7 @@ class QueryLog implements Closeable { QueryLog(Path parent, Class testClass) { this.testClass = testClass; this.parent = parent; - this.logFile = getLogFile(parent, testClass); + this.logFile = parent.resolve("query.md"); } /** @@ -51,9 +51,9 @@ public void close() { write("## " + label + " - " + name, 2); for (int i = 0, n = queries.size(); i < n; i++) { write("### Query " + (i + 1), 1); - write("````", 1); + write("```", 1); write(queries.get(i), 0); - write("````", 2); + write("```", 2); } } @@ -93,14 +93,4 @@ private void write(String text, int newLineCount) { } } - static Path getLogFile(Path parent, Class testClass) { - Path logFile = parent.resolve("test-logs/" + testClass.getName() + ".query.md"); - try { - Files.createDirectories(logFile.getParent()); - return logFile; - } catch (Exception ex) { - throw new RuntimeException("Failed to create query log directory" + logFile.getParent(), ex); - } - } - } diff --git a/src/main/java/io/deephaven/benchmark/api/Snippets.java b/src/main/java/io/deephaven/benchmark/api/Snippets.java index 191b0c82..86bded74 100644 --- a/src/main/java/io/deephaven/benchmark/api/Snippets.java +++ b/src/main/java/io/deephaven/benchmark/api/Snippets.java @@ -1,4 +1,4 @@ -/* Copyright (c) 2022-2024 Deephaven Data Labs and Patent Pending */ +/* Copyright (c) 2022-2026 Deephaven Data Labs and Patent Pending */ package io.deephaven.benchmark.api; /** @@ -75,7 +75,7 @@ with exclusive_lock(table): /** * Initialize the container for storing benchmark metrics. Define functions for getting some MX Bean data for gc, - * jit and heap + * jit and heap. *

* ex. bench_api_metrics_init() */ @@ -184,6 +184,9 @@ def bench_api_metrics_collect(): 'name=``+m[3]','value=``+m[4]','note=``+m[5]']) t.add(m1) return t + + standard_events = new_table([ string_col("origin",[]), string_col("type",[]), long_col("start_ns",[]), + long_col("duration_ns",[]), string_col("detail",[])]) """; /** @@ -258,4 +261,4 @@ static String getFunc(String functionName, String functionDef, String query, Str return functionDef + System.lineSeparator(); } -} +} \ No newline at end of file diff --git a/src/main/java/io/deephaven/benchmark/generator/LocalParquetGenerator.java b/src/main/java/io/deephaven/benchmark/generator/LocalParquetGenerator.java index 5a4b4793..692e6fd8 100644 --- a/src/main/java/io/deephaven/benchmark/generator/LocalParquetGenerator.java +++ b/src/main/java/io/deephaven/benchmark/generator/LocalParquetGenerator.java @@ -78,7 +78,7 @@ public Future produce(int perRecordPauseMillis, long maxRecordCount, in recCount++; if (++totalWritten % 10_000_000 == 0) - Log.info("Produced %s records to topic '%s'", totalWritten, topic); + Log.info("Produced %s records to topic: %s", totalWritten, topic); duration = System.currentTimeMillis() - beginTime; if (duration > maxDuration) diff --git a/src/main/java/io/deephaven/benchmark/util/Filer.java b/src/main/java/io/deephaven/benchmark/util/Filer.java index 40c55583..30010640 100644 --- a/src/main/java/io/deephaven/benchmark/util/Filer.java +++ b/src/main/java/io/deephaven/benchmark/util/Filer.java @@ -46,12 +46,25 @@ static public void delete(Path path) { */ static public Path createFile(String parentDir, String fileName) { try { - var d = Files.createDirectories(Paths.get(parentDir), PosixFilePermissions.asFileAttribute( - PosixFilePermissions.fromString("rwxr-xr-x"))); - return Files.createFile(d.resolve(fileName), + return Files.createFile(createDirectory(parentDir).resolve(fileName), PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rw-r--r--"))); } catch (Exception ex) { - throw new RuntimeException("Failed to create temp file: " + fileName, ex); + throw new RuntimeException("Failed to create file: " + fileName, ex); + } + } + + /** + * Create a directory with the given name. Create parent directories if they do not exist. Permissions are 755. + * + * @param dir the directory to create + * @return the path of the created directories + */ + static public Path createDirectory(String dir) { + try { + return Files.createDirectories(Paths.get(dir), PosixFilePermissions.asFileAttribute( + PosixFilePermissions.fromString("rwxr-xr-x"))); + } catch (Exception ex) { + throw new RuntimeException("Failed to create temp directory: " + dir, ex); } } diff --git a/src/test/java/io/deephaven/benchmark/api/QueryLogTest.java b/src/test/java/io/deephaven/benchmark/api/QueryLogTest.java index 0d5169b6..14cc37d7 100644 --- a/src/test/java/io/deephaven/benchmark/api/QueryLogTest.java +++ b/src/test/java/io/deephaven/benchmark/api/QueryLogTest.java @@ -1,4 +1,4 @@ -/* Copyright (c) 2022-2023 Deephaven Data Labs and Patent Pending */ +/* Copyright (c) 2022-2026 Deephaven Data Labs and Patent Pending */ package io.deephaven.benchmark.api; import static org.junit.jupiter.api.Assertions.*; @@ -11,7 +11,7 @@ public class QueryLogTest { @Test public void logQuery() throws Exception { Path outParent = Paths.get(getClass().getResource("test-profile.properties").toURI()).getParent(); - Files.deleteIfExists(QueryLog.getLogFile(outParent, QueryLogTest.class)); + Files.deleteIfExists(outParent.resolve("query.md")); var qlog = new QueryLog(outParent, QueryLogTest.class); qlog.setName(getClass().getSimpleName()); @@ -37,21 +37,21 @@ public void logQuery() throws Exception { ## Test - 1st Test ### Query 1 - ```` + ``` setup test - ```` + ``` ### Query 2 - ```` + ``` query1 query line - ```` + ``` ### Query 3 - ```` + ``` query2 query line - ```` + ``` """.replace("\r", "").trim(); var text = Filer.getFileText(qlog.logFile); From 25629cc1d0e9c2b64dfa0902b8397ffa5290e72f Mon Sep 17 00:00:00 2001 From: stanbrub Date: Tue, 7 Apr 2026 15:03:30 -0600 Subject: [PATCH 11/25] Added jfr events --- .../tests/train/TrainTestRunner.java | 96 ++++++++++++++----- .../deephaven/benchmark/api/BenchEvents.java | 13 +-- .../io/deephaven/benchmark/api/Snippets.java | 16 +++- .../benchmark/connect/BarrageConnector.java | 6 +- 4 files changed, 94 insertions(+), 37 deletions(-) diff --git a/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java b/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java index 054112f6..d56349a4 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java @@ -49,46 +49,90 @@ public void test(String name, long maxExpectedRowCount, String operation, String Recording = jpy.get_type("jdk.jfr.Recording") rec = Recording() rec.setName("benchmark") + + enabled_events=['jdk.GarbageCollection', 'jdk.GCPhasePause', 'jdk.GCPhaseConcurrent', 'jdk.GCCPUTime'] + for n in enabled_events: + try: + rec.enable(n) + except Exception as e: + print(f"Event Not Enabled: {e}") + + disabled_events=['jdk.ExecutionSample', 'jdk.JavaMonitorEnter', 'jdk.JavaMonitorWait', 'jdk.ThreadSleep', + 'jdk.SocketRead', 'jdk.SocketWrite'] + for n in disabled_events: + try: + rec.disable(_ename) + except Exception: + print(f"Event Not Disabled: {e}") + rec.start() """; static final String stopJfrQuery = """ Paths = jpy.get_type("java.nio.file.Paths") RecordingFile = jpy.get_type("jdk.jfr.consumer.RecordingFile") + rec.dump(Paths.get("/data/benchmark.jfr")) rec.stop() rec.close() + events = RecordingFile.readAllEvents(Paths.get("/data/benchmark.jfr")) - # Log each event's fields to the console for inspection - print("=== JFR event dump begin ===") - for i in range(events.size()): - e = events.get(i) - etype = e.getEventType() - print(f"Event {i}: type={etype.getName()}") - fields = e.getFields() - for idx in range(fields.size()): - fd = fields.get(idx) - fname = fd.getName() - fval = e.getValue(fname) - print(f" {fname} = {fval}") - print("--") - print("=== JFR event dump end ===") - jfr_rows = [] + + def getEventValue(ev, field): + try: + return ev.getValue(field) + except Exception: + return None + + def getNanoValue(ev, duration_field): + val = ev.getValue(duration_field) + if val is None or str(val) == "null": return 0 + if isinstance(val, int): return val + if hasattr(val, "size") and hasattr(val, "get"): + total = 0 + for i in range(val.size()): + d = val.get(i) + if d is not None and str(d) != "null": total += d.toNanos() + return total + if hasattr(val, "toNanos"): return val.toNanos() + raise TypeError(f"Unsupported JFR value type: {type(val)}") + + for i in range(events.size()): e = events.get(i) - start = e.getStartTime().getEpochSecond() * 1000000000 + e.getStartTime().getNano() - dur = e.getDuration().getSeconds() * 1000000000 + e.getDuration().getNano() - jfr_rows.append([str(e.getEventType().getName()), start, dur, str(e)]) - jfr = new_table([ - string_col("origin", ["jfr" for r in jfr_rows]), - string_col("type", [r[0] for r in jfr_rows]), - long_col("start_ns", [r[1] for r in jfr_rows]), - long_col("duration_ns", [r[2] for r in jfr_rows]), - string_col("detail", [r[3] for r in jfr_rows]), - ]) - standard_events = merge([standard_events, jfr]) + etype = e.getEventType().getName() + start = e.getStartTime().getEpochSecond() * 1000000000 + e.getStartTime().getNano(); + + if etype == 'jdk.GarbageCollection': + duration = getNanoValue(e, 'duration') + name = getEventValue(e, 'name') + value = getNanoValue(e, 'sumOfPauses') + elif etype == 'jdk.GCPhasePause' or etype == 'jdk.GCPhaseConcurrent': + duration = getNanoValue(e, 'duration') + name = getEventValue(e, 'name') + value = duration + elif etype == 'jdk.GCCPUTime': + duration = getNanoValue(e, 'realTime') + name = "cpuTime" + value = getNanoValue(e, 'systemTime') + getNanoValue(e, 'userTime') + else: + continue + + jfr_rows.append([etype, start, duration, name, value]) + + # Only create a table if we saw any GC events + if len(jfr_rows) > 0: + jfr_gc = new_table([ + string_col("origin", ["deephaven-engine" for r in jfr_rows]), + string_col("type", [r[0] for r in jfr_rows]), + long_col("start_ns", [r[1] for r in jfr_rows]), + long_col("duration_ns", [r[2] for r in jfr_rows]), + string_col("name", [r[3] for r in jfr_rows]), + double_col("value", [r[4] for r in jfr_rows]), + ]) + standard_events = merge([standard_events, jfr_gc]) """; static final String ugpQuery = """ diff --git a/src/main/java/io/deephaven/benchmark/api/BenchEvents.java b/src/main/java/io/deephaven/benchmark/api/BenchEvents.java index f3d715f0..1ecef26d 100644 --- a/src/main/java/io/deephaven/benchmark/api/BenchEvents.java +++ b/src/main/java/io/deephaven/benchmark/api/BenchEvents.java @@ -12,7 +12,7 @@ * Represents the events gathered during usage of the Bench API. These can include events gather by the API or the user. */ final public class BenchEvents { - static final String header = "benchmark_name,origin,type,start,duration,detail"; + static final String header = "benchmark_name,origin,type,start,duration,name,value"; final List events = new ArrayList<>(); final Path file; private String name = null; @@ -38,8 +38,9 @@ public BenchEvents add(ResultTable table) { var type = table.getValue(r, "type").toString(); var startNanos = table.getNumber(r, "start_ns").longValue(); var durationNanos = table.getNumber(r, "duration_ns").longValue(); - var details = table.getValue(r, "detail").toString(); - var event = new Event(origin, type, startNanos, durationNanos, details); + var name = String.valueOf(table.getValue(r, "name")); + var value = table.getNumber(r, "value").doubleValue(); + var event = new Event(origin, type, startNanos, durationNanos, name, value); events.add(event); } return this; @@ -75,10 +76,10 @@ static void writeLine(String line, Path file) { } } - record Event(String origin, String type, long startNanos, long durationNanos, String detail) { + record Event(String origin, String type, long startNanos, long durationNanos, String name, double value) { String toCsv() { - return origin + "," + type + "," + startNanos + "," + durationNanos + "," + detail; + return origin + "," + type + "," + startNanos + "," + durationNanos + "," + name + "," + value; } } -} \ No newline at end of file +} diff --git a/src/main/java/io/deephaven/benchmark/api/Snippets.java b/src/main/java/io/deephaven/benchmark/api/Snippets.java index 86bded74..58790bb8 100644 --- a/src/main/java/io/deephaven/benchmark/api/Snippets.java +++ b/src/main/java/io/deephaven/benchmark/api/Snippets.java @@ -175,7 +175,8 @@ def bench_api_metrics_add(category, name, value, note=''): * ex. bench_api_metrics_table = bench_api_metrics_collect() */ static String bench_api_metrics_collect = """ - from deephaven import input_table, empty_table, dtypes as dht + from deephaven import input_table, empty_table, new_table, dtypes as dht + from deephaven.column import string_col, long_col, double_col def bench_api_metrics_collect(): s = dht.string t = input_table({'timestamp':s,'origin':s,'category':s,'name':s,'value':s,'note':s}) @@ -185,8 +186,15 @@ def bench_api_metrics_collect(): t.add(m1) return t - standard_events = new_table([ string_col("origin",[]), string_col("type",[]), long_col("start_ns",[]), - long_col("duration_ns",[]), string_col("detail",[])]) + # Standard events table used by JFR workflows + standard_events = new_table([ + string_col("origin", []), + string_col("type", []), + long_col("start_ns", []), + long_col("duration_ns", []), + string_col("name", []), + double_col("value", []), + ]) """; /** @@ -261,4 +269,4 @@ static String getFunc(String functionName, String functionDef, String query, Str return functionDef + System.lineSeparator(); } -} \ No newline at end of file +} diff --git a/src/main/java/io/deephaven/benchmark/connect/BarrageConnector.java b/src/main/java/io/deephaven/benchmark/connect/BarrageConnector.java index 632811ca..85cc3c3f 100644 --- a/src/main/java/io/deephaven/benchmark/connect/BarrageConnector.java +++ b/src/main/java/io/deephaven/benchmark/connect/BarrageConnector.java @@ -35,7 +35,8 @@ class BarrageConnector implements Connector { static { System.setProperty("thread.initialization", ""); // Remove server side initializers (e.g. DebuggingInitializer) } - static final int maxFetchCount = 1000; + static final int maxFetchCount = 100000; + static final int inboundMessageMB = 64; final private BarrageSession session; final private ConsoleSession console; final private ManagedChannel channel; @@ -243,6 +244,9 @@ private ManagedChannel getManagedChannel(String host, int port) { final ManagedChannelBuilder channelBuilder = ManagedChannelBuilder.forAddress(host, port); channelBuilder.usePlaintext(); // channelBuilder.useTransportSecurity(); If eventually security is needed + // Increase the maximum inbound message size so large Barrage snapshots (e.g. standard_events) + // do not trip the default 4 MiB gRPC limit while prototyping benchmarks. + channelBuilder.maxInboundMessageSize(inboundMessageMB * 1024 * 1024); // 32 MiB return channelBuilder.build(); } From 528c36560cde30e5616194515707c59278cbd397 Mon Sep 17 00:00:00 2001 From: stanbrub Date: Thu, 9 Apr 2026 11:21:48 -0600 Subject: [PATCH 12/25] Added UGP events --- .../tests/train/TrainTestRunner.java | 65 ++++++++++++++----- .../io/deephaven/benchmark/api/Snippets.java | 3 +- .../dashboards/benchmark_functions.dh.py | 36 ++++++---- 3 files changed, 73 insertions(+), 31 deletions(-) diff --git a/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java b/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java index d56349a4..ad0d7e23 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java @@ -39,11 +39,12 @@ public void addSetupQuery(String query) { public void test(String name, long maxExpectedRowCount, String operation, String... loadColumns) { delegate.addSetupQuery(startJfrQuery); + delegate.addSetupQuery(startUgpQuery); + delegate.addTeardownQuery(stopUgpQuery); delegate.addTeardownQuery(stopJfrQuery); - delegate.addTeardownQuery(ugpQuery); delegate.test(name, maxExpectedRowCount, operation, loadColumns); } - + static final String startJfrQuery = """ import jpy Recording = jpy.get_type("jdk.jfr.Recording") @@ -54,20 +55,20 @@ public void test(String name, long maxExpectedRowCount, String operation, String for n in enabled_events: try: rec.enable(n) - except Exception as e: - print(f"Event Not Enabled: {e}") + except Exception: + print(f"Event Not Enabled: {n}") disabled_events=['jdk.ExecutionSample', 'jdk.JavaMonitorEnter', 'jdk.JavaMonitorWait', 'jdk.ThreadSleep', 'jdk.SocketRead', 'jdk.SocketWrite'] for n in disabled_events: try: - rec.disable(_ename) + rec.disable(n) except Exception: - print(f"Event Not Disabled: {e}") + print(f"Event Not Disabled: {n}") rec.start() """; - + static final String stopJfrQuery = """ Paths = jpy.get_type("java.nio.file.Paths") RecordingFile = jpy.get_type("jdk.jfr.consumer.RecordingFile") @@ -77,7 +78,6 @@ public void test(String name, long maxExpectedRowCount, String operation, String rec.close() events = RecordingFile.readAllEvents(Paths.get("/data/benchmark.jfr")) - jfr_rows = [] def getEventValue(ev, field): @@ -99,7 +99,6 @@ def getNanoValue(ev, duration_field): if hasattr(val, "toNanos"): return val.toNanos() raise TypeError(f"Unsupported JFR value type: {type(val)}") - for i in range(events.size()): e = events.get(i) etype = e.getEventType().getName() @@ -122,7 +121,6 @@ raise TypeError(f"Unsupported JFR value type: {type(val)}") jfr_rows.append([etype, start, duration, name, value]) - # Only create a table if we saw any GC events if len(jfr_rows) > 0: jfr_gc = new_table([ string_col("origin", ["deephaven-engine" for r in jfr_rows]), @@ -134,11 +132,44 @@ raise TypeError(f"Unsupported JFR value type: {type(val)}") ]) standard_events = merge([standard_events, jfr_gc]) """; - - static final String ugpQuery = """ - from deephaven import write_csv - import deephaven.perfmon as pm - ugp = pm.update_performance_log() - write_csv(ugp, "/data/ugp_cycles.csv") + + static final String startUgpQuery = """ + from deephaven import time_table + from deephaven.table_listener import listen + import time + + if 'train_ugp_listener' in globals(): train_ugp_listener.stop() + train_wall_epoch_ns = time.time_ns() + train_ugp_times = [time.perf_counter_ns()] + train_time_table = time_table("PT0.001S").tail(1) + + def train_ugp_update(update, is_replay): + train_ugp_times.append(time.perf_counter_ns()) + + train_ugp_listener = listen(train_time_table, train_ugp_update) + """; + + static final String stopUgpQuery = """ + if 'train_ugp_listener' in globals(): train_ugp_listener.stop() + if len(train_ugp_times) > 1: + mono_start = train_ugp_times[0] + ugp_rows = [] + for i in range(1, len(train_ugp_times)): + mono_prev = train_ugp_times[i - 1] + mono_curr = train_ugp_times[i] + delta_ns = mono_curr - mono_prev + wall_clock_ns = train_wall_epoch_ns + (mono_curr - mono_start) + ugp_rows.append([wall_clock_ns, delta_ns, mono_curr]) + + ugp_events = new_table([ + string_col("origin", ["deephaven-engine"] * len(ugp_rows)), + string_col("type", ["ugp.delta"] * len(ugp_rows)), + long_col("start_ns", [r[0] for r in ugp_rows]), + long_col("duration_ns", [r[1] for r in ugp_rows]), + string_col("name", ["elapsedTime"] * len(ugp_rows)), + double_col("value", [float(r[2]) for r in ugp_rows]), + ]) + + standard_events = merge([standard_events, ugp_events]) """; -} \ No newline at end of file +} diff --git a/src/main/java/io/deephaven/benchmark/api/Snippets.java b/src/main/java/io/deephaven/benchmark/api/Snippets.java index 58790bb8..fc659346 100644 --- a/src/main/java/io/deephaven/benchmark/api/Snippets.java +++ b/src/main/java/io/deephaven/benchmark/api/Snippets.java @@ -185,8 +185,7 @@ def bench_api_metrics_collect(): 'name=``+m[3]','value=``+m[4]','note=``+m[5]']) t.add(m1) return t - - # Standard events table used by JFR workflows + standard_events = new_table([ string_col("origin", []), string_col("type", []), diff --git a/src/main/resources/io/deephaven/benchmark/run/profile/queries/dashboards/benchmark_functions.dh.py b/src/main/resources/io/deephaven/benchmark/run/profile/queries/dashboards/benchmark_functions.dh.py index be362158..071e2f8f 100644 --- a/src/main/resources/io/deephaven/benchmark/run/profile/queries/dashboards/benchmark_functions.dh.py +++ b/src/main/resources/io/deephaven/benchmark/run/profile/queries/dashboards/benchmark_functions.dh.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2024 Deephaven Data Labs and Patent Pending +# Copyright (c) 2022-2026 Deephaven Data Labs and Patent Pending # # Deephaven python functions to support Benchmark Dashboards. These functions produce basic tables, # format strings, and do calculations. The data for creating tables is downloaded and cached from @@ -6,11 +6,9 @@ # # Requirements: Deephaven 0.36.1 or greater -import os, re, glob, jpy -import deephaven.dtypes as dht +import os, re, jpy from deephaven import read_csv, merge, agg, empty_table, input_table, dtypes as dht from urllib.request import urlopen, urlretrieve -from numpy import typing as npt # Convert the given name to a name suitable for a DH column name def normalize_name(name): @@ -123,8 +121,11 @@ def convert_result(table): # Do any conversions of type or column name needed from benchmark-metrics.csv def convert_metric(table): - return table.view(['benchmark_name','origin','timestamp=(long)timestamp','name', - 'value=(double)value','note']) + return table.view(['benchmark_name','origin','timestamp=(long)timestamp','name','value=(double)value','note']) + +# Do any conversions of type or column name needed from benchmark-events.csv +def convert_event(table): + return table.view(['benchmark_name','origin','start','duration','name','value=(double)value']) # Do any conversions of type or column name needed from benchmark-platform.csv def convert_platform(table): @@ -171,6 +172,11 @@ def load_bench_results(storage_uri, category='adhoc', actor_filter=None, set_fil def load_bench_metrics(storage_uri, category='adhoc', actor_filter=None, set_filter=None): run_ids = get_run_paths(storage_uri, category, actor_filter, set_filter, 100) return merge_run_tables(storage_uri, run_ids, category, 'benchmark-metrics.csv', convert_metric) + +# Load all benchmark-events.csv data collected from the given storage, category, and filters +def load_bench_events(storage_uri, category='adhoc', actor_filter=None, set_filter=None): + run_ids = get_run_paths(storage_uri, category, actor_filter, set_filter, 100) + return merge_run_tables(storage_uri, run_ids, category, 'benchmark-events.csv', convert_event) # Load all benchmark-platform.csv data collected from the given storage, category, and filters def load_bench_platform(storage_uri, category='adhoc', actor_filter=None, set_filter=None): @@ -201,7 +207,9 @@ def load_table_or_empty(table_name, storage_uri, category='adhoc', actor_filter= return globals()[f'empty_bench_{table_name}']() # Add columns for the specified platform properties -def add_platform_values(table, pnames=[], cnames = []): +def add_platform_values(table, pnames=None, cnames=None): + pnames = pnames if pnames is not None else [] + cnames = cnames if cnames is not None else [] pnames = list(dict.fromkeys(pnames)) for pname in pnames: new_pname = normalize_name(pname) @@ -213,14 +221,16 @@ def add_platform_values(table, pnames=[], cnames = []): return table # Add columns for the specified metric properties -def add_metric_values(table, pnames=[], cnames=[]): +def add_metric_values(table, pnames=None, cnames=None): + pnames = pnames if pnames is not None else [] + cnames = cnames if cnames is not None else [] pnames = list(dict.fromkeys(pnames)) for pname in pnames: new_pname = normalize_name(pname) cnames.append(new_pname) - single_metrtics = bench_metrics.where(['name=pname']).first_by(['benchmark_name','set_id','run_id','origin']) + single_metrics = bench_metrics.where(['name=pname']).first_by(['benchmark_name','set_id','run_id','origin']) table = table.natural_join( - single_metrtics, on=['benchmark_name','set_id','run_id','origin'], joins=[new_pname+'=value'] + single_metrics, on=['benchmark_name','set_id','run_id','origin'], joins=[new_pname+'=value'] ) return table @@ -239,12 +249,14 @@ def format_columns(table,pct_cols=(),int_cols=()): # Get a percentage standard deviation for the given list of rates def rstd(rates) -> float: rates = [i for i in rates if i >= 0] + if not rates: return 0.0 mean = statistics.mean(rates) return (statistics.pstdev(rates) / mean) if mean != 0 else 0.0 # Get the zscore of one rate against a list of rates def zscore(rate, rates) -> float: rates = [i for i in rates if i >= 0] + if not rates: return 0.0 std = statistics.pstdev(rates) return ((rate - statistics.mean(rates)) / std) if std != 0 else 0.0 @@ -260,11 +272,11 @@ def rchange(rates) -> float: rates = array('l', rates) if(len(rates) < 2): return 0.0 m = statistics.mean(rates[:-1]) - return (rates[-1] - m) / m + return ((rates[-1] - m) / m) if m != 0 else 0.0 # Get the percentage gain between two values def gain(start, end) -> float: - return (end - start) / start + return ((end - start) / start) if start != 0 else 0.0 # Format a list of rates to make them easier to read in a DHC table def format_rates(rates): From bd5ff0233e3e9ba493158d2a31604a4cac3a9182 Mon Sep 17 00:00:00 2001 From: stanbrub Date: Thu, 9 Apr 2026 19:45:43 -0600 Subject: [PATCH 13/25] Rescaled only static trained for 120 secs --- .../tests/standard/StandardTestRunner.java | 8 +++---- .../benchmark/tests/train/AggByTrainTest.java | 7 +++---- .../tests/train/FilterTrainTest.java | 12 +++++------ .../tests/train/FormulaTrainTest.java | 6 +++--- .../tests/train/NaturalJoinTrainTest.java | 4 ++-- .../tests/train/OrderedTrainTest.java | 4 ++-- .../tests/train/TrainTestRunner.java | 13 ++++++------ .../tests/train/UpdateByTrainTest.java | 4 ++-- .../io/deephaven/benchmark/api/Snippets.java | 21 ++++++++++--------- 9 files changed, 40 insertions(+), 39 deletions(-) diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java b/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java index 9d4b23ae..77fa4f59 100644 --- a/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java +++ b/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java @@ -36,7 +36,7 @@ final public class StandardTestRunner { private int staticFactor = 1; private int incFactor = 1; private int rowCountFactor = 1; - private boolean useMemorySource = true; + private boolean useCachedSource = true; private boolean useLocalParquet = false; public StandardTestRunner(Object testInst) { @@ -107,8 +107,8 @@ public void setServices(String... services) { * * @return true if in memory source, otherwise false */ - public void useMemorySource(boolean useMemorySource) { - this.useMemorySource = useMemorySource; + public void useCachedSource(boolean useMemorySource) { + this.useCachedSource = useMemorySource; } /** @@ -242,7 +242,7 @@ long getMaxExpectedRowCount(long expectedRowCount, long scaleFactor) { String getReadOperation(int scaleFactor, long rowCount, String... loadColumns) { var headRows = (rowCount >= getGeneratedRowCount()) ? "" : ".head(${rows})"; - var selectStr = useMemorySource ? "select" : "view"; + var selectStr = useCachedSource ? "select" : "view"; if (scaleFactor > 1 && mainTable.equals("timed") && Arrays.asList(loadColumns).contains("timestamp")) { var read = """ merge([ diff --git a/src/it/java/io/deephaven/benchmark/tests/train/AggByTrainTest.java b/src/it/java/io/deephaven/benchmark/tests/train/AggByTrainTest.java index 22162021..fd6ba528 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/AggByTrainTest.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/AggByTrainTest.java @@ -25,18 +25,17 @@ void setup(double rowFactor) { } @Test - @Disabled void aggBy0Groups() { - setup(120); + setup(572); var q = "timed.agg_by(aggs)"; runner.test("AggBy- No Groups", 1, q, "num1", "num2"); } @Test void aggBy2Groups() { - setup(21); + setup(66); var q = "timed.agg_by(aggs, by=['key1', 'key2'])"; - runner.test("AggBy- 2 Groups 10K Unique Combos ", 10100, q, "key1", "key2", "num1", "num2"); + runner.test("AggBy- 2 Groups 10K Unique Combos", 10100, q, "key1", "key2", "num1", "num2"); } } diff --git a/src/it/java/io/deephaven/benchmark/tests/train/FilterTrainTest.java b/src/it/java/io/deephaven/benchmark/tests/train/FilterTrainTest.java index a3396ee4..32d91289 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/FilterTrainTest.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/FilterTrainTest.java @@ -25,18 +25,18 @@ void setup(double rowFactor) { } @Test - void filter1Col() { - setup(390); - var q = "timed.where_in(where_filter, cols=['key1 = set1']).where(['key1 < `4`'])"; - runner.test("Filter- 1 Col", 0, q, "key1", "num1"); + void filter2Cols() { + setup(815); + var q = "timed.where_in(where_filter, cols=['key1 = set1']).where(['inRange(num1, 0, 100)'])"; + runner.test("Filter- 2 Cols", 0, q, "key1", "key2", "num1"); } @Test void filter3Cols() { - setup(390); + setup(336); var q = """ timed.where_in(where_filter, cols=['key1 = set1', 'key2 = set2', 'key3 = set3']) \ - .where(filters=["key1 = '1'", "key2 < '100'", "key3 in -2, -1, 0, 1, 2"]) + .where(filters=["key1 = '1'", 'inRange(num1, 0, 100)', 'key3 in -2, -1, 0, 1, 2']) """; runner.test("Filter- 3 Cols", 0, q, "key1", "key2", "key3", "num1"); } diff --git a/src/it/java/io/deephaven/benchmark/tests/train/FormulaTrainTest.java b/src/it/java/io/deephaven/benchmark/tests/train/FormulaTrainTest.java index 6b0f11cb..e88ad340 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/FormulaTrainTest.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/FormulaTrainTest.java @@ -16,7 +16,7 @@ void setup(double rowFactor) { @Test void formulaUdf() { - setup(4.8); + setup(9); var setup = """ def f_py(num1: float, num2: float) -> float: return (num2 + num1) / 2 @@ -30,14 +30,14 @@ def f_np(num1: np.float64, num2: np.float64) -> np.float64: @Test void formulaInline() { - setup(220); + setup(467); var q = "timed.view(['New1 = (float)((num2 + num1) / 2)', 'New2 = (float)(num1 + num2)']).sum_by()"; runner.test("Formula- Inline 2 Calcs", 1, q, "num1", "num2"); } @Test void formulaDate() { - setup(1.8); + setup(2.7); var q = """ timed.view([ 'New1 = parseDuration(`PT4H52M14S`).toHours()', diff --git a/src/it/java/io/deephaven/benchmark/tests/train/NaturalJoinTrainTest.java b/src/it/java/io/deephaven/benchmark/tests/train/NaturalJoinTrainTest.java index 34286996..3fc5a5c7 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/NaturalJoinTrainTest.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/NaturalJoinTrainTest.java @@ -16,7 +16,7 @@ void setup(double rowFactor) { @Test void naturalJoinOn1Col() { - setup(60); + setup(300); var r = "right = right.select_distinct(['r_wild'])"; runner.addSetupQuery(r); var q = "timed.natural_join(right, on=['key1 = r_wild'])"; @@ -25,7 +25,7 @@ void naturalJoinOn1Col() { @Test void naturalJoinOn3Cols() { - setup(20); + setup(100); var q = "timed.natural_join(right, on=['key1 = r_wild', 'key2 = r_key2', 'key1 = r_key1'])"; runner.test("NaturalJoin- Join On 3 Cols", 0, q, "key1", "key2", "num1"); } diff --git a/src/it/java/io/deephaven/benchmark/tests/train/OrderedTrainTest.java b/src/it/java/io/deephaven/benchmark/tests/train/OrderedTrainTest.java index 0a6978fe..c1cb6d74 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/OrderedTrainTest.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/OrderedTrainTest.java @@ -27,14 +27,14 @@ void setup(double rowFactor) { @Test void ordered0Groups() { - setup(21); + setup(145); var q = "timed.agg_by(aggs)"; runner.test("Ordered- No Groups", 100, q, "key3", "key4", "num1", "num2"); } @Test void ordered2Groups() { - setup(5); + setup(22); var q = "timed.agg_by(aggs, by=['key1', 'key2'])"; runner.test("Ordered- 2 Groups 10K Unique Combos", 10100, q, "key1", "key2", "key3", "key4", "num1", "num2"); } diff --git a/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java b/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java index ad0d7e23..c523fb74 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java @@ -13,16 +13,17 @@ * versions and GC types. */ final public class TrainTestRunner { - static final int maxRowFactor = 400; + static final int maxRowFactor = 1000; final StandardTestRunner delegate; final long baseRowCount; TrainTestRunner(Object testInst) { this.delegate = new StandardTestRunner(testInst); this.baseRowCount = delegate.getGeneratedRowCount(); - delegate.useMemorySource(false); + delegate.useCachedSource(false); delegate.useLocalParquet(true); delegate.setRowFactor(maxRowFactor); + delegate.setScaleFactors(1, 0); // TODO: This is temporary for just-statics tests } public void tables(double rowFactor, String... names) { @@ -38,10 +39,10 @@ public void addSetupQuery(String query) { } public void test(String name, long maxExpectedRowCount, String operation, String... loadColumns) { - delegate.addSetupQuery(startJfrQuery); - delegate.addSetupQuery(startUgpQuery); - delegate.addTeardownQuery(stopUgpQuery); - delegate.addTeardownQuery(stopJfrQuery); +// delegate.addSetupQuery(startJfrQuery); +// delegate.addSetupQuery(startUgpQuery); +// delegate.addTeardownQuery(stopUgpQuery); +// delegate.addTeardownQuery(stopJfrQuery); delegate.test(name, maxExpectedRowCount, operation, loadColumns); } diff --git a/src/it/java/io/deephaven/benchmark/tests/train/UpdateByTrainTest.java b/src/it/java/io/deephaven/benchmark/tests/train/UpdateByTrainTest.java index 9ed28657..956fb72c 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/UpdateByTrainTest.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/UpdateByTrainTest.java @@ -34,7 +34,7 @@ void setup(double rowFactor) { @Test void mixedComboNoGroups() { - setup(9.8); + setup(21.8); runner.addSetupQuery(noGroups); var q = "timed.update_by(ops=[avg_contains, max_before, prod_after, ema_tick_op, min_op, sum_op])"; runner.test("UpdateBy- No Groups 12 Cols", 0, q, "num1", "num2", "timestamp"); @@ -42,7 +42,7 @@ void mixedComboNoGroups() { @Test void rollingCombo2Groups() { - setup(2.8); + setup(5.8); runner.addSetupQuery(group10K); var q = """ timed.update_by(ops=[avg_contains,max_before,prod_after,ema_tick_op,min_op,sum_op], by=['key1','key2']) diff --git a/src/main/java/io/deephaven/benchmark/api/Snippets.java b/src/main/java/io/deephaven/benchmark/api/Snippets.java index fc659346..b5f52bfc 100644 --- a/src/main/java/io/deephaven/benchmark/api/Snippets.java +++ b/src/main/java/io/deephaven/benchmark/api/Snippets.java @@ -80,9 +80,19 @@ with exclusive_lock(table): * ex. bench_api_metrics_init() */ static String bench_api_metrics_init = """ + from deephaven import new_table + from deephaven.column import string_col, long_col, double_col def bench_api_metrics_init(): - global bench_api_metrics + global bench_api_metrics, standard_events bench_api_metrics = [] + standard_events = new_table([ + string_col("origin", []), + string_col("type", []), + long_col("start_ns", []), + long_col("duration_ns", []), + string_col("name", []), + double_col("value", []), + ]) """; /** @@ -185,15 +195,6 @@ def bench_api_metrics_collect(): 'name=``+m[3]','value=``+m[4]','note=``+m[5]']) t.add(m1) return t - - standard_events = new_table([ - string_col("origin", []), - string_col("type", []), - long_col("start_ns", []), - long_col("duration_ns", []), - string_col("name", []), - double_col("value", []), - ]) """; /** From 75449bb7fb200ec4285d55a9272f44646f1bf24d Mon Sep 17 00:00:00 2001 From: stanbrub Date: Thu, 9 Apr 2026 20:11:43 -0600 Subject: [PATCH 14/25] Updated adhoc for local parquet env variables --- .github/resources/adhoc-benchmark-docker-compose.yml | 1 + .github/scripts/manage-deephaven-remote.sh | 1 + 2 files changed, 2 insertions(+) diff --git a/.github/resources/adhoc-benchmark-docker-compose.yml b/.github/resources/adhoc-benchmark-docker-compose.yml index c02c30e3..78a352f1 100644 --- a/.github/resources/adhoc-benchmark-docker-compose.yml +++ b/.github/resources/adhoc-benchmark-docker-compose.yml @@ -8,6 +8,7 @@ services: - ./minio:/minio environment: - "START_OPTS=-DAuthHandlers=io.deephaven.auth.AnonymousAuthenticationHandler ${CONFIG_OPTS}" + - "DEEPHAVEN_HOST_OS_DIR=${ENV_DEEPHAVEN_HOST_OS_DIR}" redpanda: command: diff --git a/.github/scripts/manage-deephaven-remote.sh b/.github/scripts/manage-deephaven-remote.sh index e1e49cf8..57af19cb 100755 --- a/.github/scripts/manage-deephaven-remote.sh +++ b/.github/scripts/manage-deephaven-remote.sh @@ -35,6 +35,7 @@ if [[ ${CONFIG_OPTS} == "" ]]; then CONFIG_OPTS="-Xmx24g" fi echo "CONFIG_OPTS=${CONFIG_OPTS}" > .env +echo "ENV_DEEPHAVEN_HOST_OS_DIR=${DEEPHAVEN_DIR}" >> .env IS_BRANCH="false" if [[ ${DOCKER_IMG} == *"@sha"*":"* ]]; then From ec2d95e5d7092ba95e9d15a691427de51d19fae7 Mon Sep 17 00:00:00 2001 From: stanbrub Date: Thu, 9 Apr 2026 20:24:41 -0600 Subject: [PATCH 15/25] Open up dh data dir so local parquet can work --- .github/scripts/setup-test-server-remote.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/scripts/setup-test-server-remote.sh b/.github/scripts/setup-test-server-remote.sh index 43033300..920e6c4d 100755 --- a/.github/scripts/setup-test-server-remote.sh +++ b/.github/scripts/setup-test-server-remote.sh @@ -135,7 +135,8 @@ sudo docker system prune --volumes --force sudo rm -rf ${DEEPHAVEN_DIR} title "-- Staging Docker Resources --" -mkdir -p ${DEEPHAVEN_DIR} +mkdir -p ${DEEPHAVEN_DIR}/data +chmod 777 ${DEEPHAVEN_DIR}/data cd ${DEEPHAVEN_DIR} cp ${GIT_DIR}/benchmark/.github/resources/${RUN_TYPE}-benchmark-docker-compose.yml docker-compose.yml From a402a54a524ebd07589f8d2d22690abc5ca76dcd Mon Sep 17 00:00:00 2001 From: stanbrub Date: Fri, 10 Apr 2026 11:50:20 -0600 Subject: [PATCH 16/25] More logging for benchmark runs --- .../deephaven/benchmark/tests/standard/StandardTestRunner.java | 2 ++ src/main/java/io/deephaven/benchmark/api/BenchTable.java | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java b/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java index 77fa4f59..d4be6b03 100644 --- a/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java +++ b/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java @@ -10,6 +10,7 @@ import io.deephaven.benchmark.controller.Controller; import io.deephaven.benchmark.controller.DeephavenDockerController; import io.deephaven.benchmark.metric.Metrics; +import io.deephaven.benchmark.util.Log; import io.deephaven.benchmark.util.Timer; /** @@ -374,6 +375,7 @@ Result runTest(String name, String warmupQuery, String mainQuery) { stopUnusedServices(requiredServices); try { + Log.info("Running Test: %s", name); if (getWarmupRowCount() > 0) api.query(warmupQuery).execute(); var result = new AtomicReference(); diff --git a/src/main/java/io/deephaven/benchmark/api/BenchTable.java b/src/main/java/io/deephaven/benchmark/api/BenchTable.java index 11ccd10e..1c302cac 100644 --- a/src/main/java/io/deephaven/benchmark/api/BenchTable.java +++ b/src/main/java/io/deephaven/benchmark/api/BenchTable.java @@ -188,7 +188,7 @@ public boolean generateParquet() { }).execute(); if (usedExistingParquet.get()) { - Log.info("Using existing table '%s' with %s rows", tableName, getRowCount()); + Log.info("\nUsing existing table '%s' with %s rows", tableName, getRowCount()); return false; } Log.info("Generating table '%s' with %s rows", tableName, getRowCount()); From 4cf83575928f59de9eef0fb9f355acf8f8007c1f Mon Sep 17 00:00:00 2001 From: stanbrub Date: Fri, 10 Apr 2026 13:28:44 -0600 Subject: [PATCH 17/25] Scaling back AggBy because of system lockup --- .../io/deephaven/benchmark/tests/train/AggByTrainTest.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/it/java/io/deephaven/benchmark/tests/train/AggByTrainTest.java b/src/it/java/io/deephaven/benchmark/tests/train/AggByTrainTest.java index fd6ba528..85ed5615 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/AggByTrainTest.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/AggByTrainTest.java @@ -17,8 +17,8 @@ void setup(double rowFactor) { from deephaven import agg aggs = [ - agg.sum_('Sum=num1'), agg.std('Std=num2'), agg.min_('Min=num1'), agg.max_('Max=num2'), - agg.avg('Avg=num1'), agg.var('Var=num2'), agg.count_('num1') + agg.sum_('Sum=num1'), agg.std('Std=num2'), agg.min_('Min=num1'), agg.max_('Max=num2'), + agg.avg('Avg=num1'), agg.var('Var=num2'), agg.count_('num1') ] """; runner.addSetupQuery(setupStr); @@ -26,7 +26,7 @@ void setup(double rowFactor) { @Test void aggBy0Groups() { - setup(572); + setup(400); var q = "timed.agg_by(aggs)"; runner.test("AggBy- No Groups", 1, q, "num1", "num2"); } From 85077947dbfd69167aff0e18885ac18864c370d2 Mon Sep 17 00:00:00 2001 From: stanbrub Date: Fri, 10 Apr 2026 15:19:16 -0600 Subject: [PATCH 18/25] Restrict the number of parquet threads and memory for the runner --- .github/scripts/run-benchmarks-remote.sh | 2 +- .../java/io/deephaven/benchmark/tests/train/AggByTrainTest.java | 2 +- src/main/java/io/deephaven/benchmark/api/BenchTable.java | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/scripts/run-benchmarks-remote.sh b/.github/scripts/run-benchmarks-remote.sh index b28a25c1..259915e6 100755 --- a/.github/scripts/run-benchmarks-remote.sh +++ b/.github/scripts/run-benchmarks-remote.sh @@ -43,7 +43,7 @@ title "-- Running Benchmarks --" set +f cd ${RUN_DIR} cat ${RUN_TYPE}-scale-benchmark.properties | sed 's|${baseRowCount}|'"${ROW_COUNT}|g" | sed 's|${baseDistrib}|'"${DISTRIB}|g" | sed 's|${userHome}|'"${HOME}|g" > scale-benchmark.properties -JAVA_OPTS=$(echo -Dbenchmark.profile=scale-benchmark.properties -jar deephaven-benchmark-*-standalone.jar -cp standard-tests.jar) +JAVA_OPTS=$(echo -Xmx4g -Dbenchmark.profile=scale-benchmark.properties -jar deephaven-benchmark-*-standalone.jar -cp standard-tests.jar) set -f if [ "${TAG_NAME}" = "Any" ]; then diff --git a/src/it/java/io/deephaven/benchmark/tests/train/AggByTrainTest.java b/src/it/java/io/deephaven/benchmark/tests/train/AggByTrainTest.java index 85ed5615..7d876359 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/AggByTrainTest.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/AggByTrainTest.java @@ -26,7 +26,7 @@ void setup(double rowFactor) { @Test void aggBy0Groups() { - setup(400); + setup(572); var q = "timed.agg_by(aggs)"; runner.test("AggBy- No Groups", 1, q, "num1", "num2"); } diff --git a/src/main/java/io/deephaven/benchmark/api/BenchTable.java b/src/main/java/io/deephaven/benchmark/api/BenchTable.java index 1c302cac..50198c42 100644 --- a/src/main/java/io/deephaven/benchmark/api/BenchTable.java +++ b/src/main/java/io/deephaven/benchmark/api/BenchTable.java @@ -246,7 +246,7 @@ public boolean generateLocalParquet() { throw new RuntimeException("DEEPHAVEN_HOST_OS_DIR env must be set to use local parquet generation"); var parquetPath = (dhHostOsDir.get() + "/" + tableGenParquet.get()).replace(".parquet", ".dataset"); - var threadCount = Runtime.getRuntime().availableProcessors(); + var threadCount = 8; var rowsPerThread = getRowCount() / threadCount; var futures = new ArrayList>(threadCount); for (int i = 0; i < threadCount; i++) { From c0b5e7a5861af559feab5b615aa46cd6527dd6c8 Mon Sep 17 00:00:00 2001 From: stanbrub Date: Fri, 10 Apr 2026 21:59:03 -0600 Subject: [PATCH 19/25] Fixed NaturalJoin OOM --- .../deephaven/benchmark/tests/train/NaturalJoinTrainTest.java | 2 +- .../io/deephaven/benchmark/tests/train/TrainTestRunner.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/it/java/io/deephaven/benchmark/tests/train/NaturalJoinTrainTest.java b/src/it/java/io/deephaven/benchmark/tests/train/NaturalJoinTrainTest.java index 3fc5a5c7..6837bcc8 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/NaturalJoinTrainTest.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/NaturalJoinTrainTest.java @@ -16,7 +16,7 @@ void setup(double rowFactor) { @Test void naturalJoinOn1Col() { - setup(300); + setup(230); var r = "right = right.select_distinct(['r_wild'])"; runner.addSetupQuery(r); var q = "timed.natural_join(right, on=['key1 = r_wild'])"; diff --git a/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java b/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java index c523fb74..555d0487 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java @@ -13,7 +13,7 @@ * versions and GC types. */ final public class TrainTestRunner { - static final int maxRowFactor = 1000; + static final int maxRowFactor = 850; final StandardTestRunner delegate; final long baseRowCount; From 8f1a77f1c741852467b015c69808a98935bf81a2 Mon Sep 17 00:00:00 2001 From: stanbrub Date: Wed, 22 Apr 2026 10:58:42 -0600 Subject: [PATCH 20/25] Added separate scalling for static vs inc --- .../tests/standard/StandardTestRunner.java | 2 +- .../benchmark/tests/train/AggByTrainTest.java | 8 +-- .../tests/train/FilterTrainTest.java | 8 +-- .../tests/train/FormulaTrainTest.java | 10 +-- .../tests/train/NaturalJoinTrainTest.java | 8 +-- .../tests/train/OrderedTrainTest.java | 8 +-- .../tests/train/TrainTestRunner.java | 67 +++++++++++++------ .../tests/train/UpdateByTrainTest.java | 8 +-- 8 files changed, 71 insertions(+), 48 deletions(-) diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java b/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java index d4be6b03..31388404 100644 --- a/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java +++ b/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java @@ -355,7 +355,6 @@ String getIncQuery(String name, String operation, long rowCount, String... loadC String populateQuery(String name, String query, String operation, String read, String... loadColumns) { query = query.replace("${readTable}", read); - query = query.replace("${mainTable}", mainTable); query = query.replace("${loadSupportTables}", loadSupportTables()); query = query.replace("${loadColumns}", listStr(loadColumns)); query = query.replace("${setupQueries}", String.join("\n", setupQueries)); @@ -364,6 +363,7 @@ String populateQuery(String name, String query, String operation, String read, S query = query.replace("${teardownQueries}", String.join("\n", teardownQueries)); query = query.replace("${logOperationBegin}", getLogSnippet("Begin", name)); query = query.replace("${logOperationEnd}", getLogSnippet("End", name)); + query = query.replace("${mainTable}", mainTable); return query; } diff --git a/src/it/java/io/deephaven/benchmark/tests/train/AggByTrainTest.java b/src/it/java/io/deephaven/benchmark/tests/train/AggByTrainTest.java index 7d876359..57e38894 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/AggByTrainTest.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/AggByTrainTest.java @@ -10,8 +10,8 @@ public class AggByTrainTest { final TrainTestRunner runner = new TrainTestRunner(this); - void setup(double rowFactor) { - runner.tables(rowFactor, "timed"); + void setup(double staticRowFactor, double incRowFactor) { + runner.tables(staticRowFactor, incRowFactor, "timed"); var setupStr = """ from deephaven import agg @@ -26,14 +26,14 @@ void setup(double rowFactor) { @Test void aggBy0Groups() { - setup(572); + setup(572, 286); var q = "timed.agg_by(aggs)"; runner.test("AggBy- No Groups", 1, q, "num1", "num2"); } @Test void aggBy2Groups() { - setup(66); + setup(66, 38); var q = "timed.agg_by(aggs, by=['key1', 'key2'])"; runner.test("AggBy- 2 Groups 10K Unique Combos", 10100, q, "key1", "key2", "num1", "num2"); } diff --git a/src/it/java/io/deephaven/benchmark/tests/train/FilterTrainTest.java b/src/it/java/io/deephaven/benchmark/tests/train/FilterTrainTest.java index 32d91289..9c106897 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/FilterTrainTest.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/FilterTrainTest.java @@ -11,8 +11,8 @@ public class FilterTrainTest { final TrainTestRunner runner = new TrainTestRunner(this); - void setup(double rowFactor) { - runner.tables(rowFactor, "timed"); + void setup(double staticRowFactor, double incRowFactor) { + runner.tables(staticRowFactor, incRowFactor, "timed"); var setup = """ from deephaven.column import string_col, int_col where_filter = new_table([ @@ -26,14 +26,14 @@ void setup(double rowFactor) { @Test void filter2Cols() { - setup(815); + setup(815, 815); var q = "timed.where_in(where_filter, cols=['key1 = set1']).where(['inRange(num1, 0, 100)'])"; runner.test("Filter- 2 Cols", 0, q, "key1", "key2", "num1"); } @Test void filter3Cols() { - setup(336); + setup(400, 400); var q = """ timed.where_in(where_filter, cols=['key1 = set1', 'key2 = set2', 'key3 = set3']) \ .where(filters=["key1 = '1'", 'inRange(num1, 0, 100)', 'key3 in -2, -1, 0, 1, 2']) diff --git a/src/it/java/io/deephaven/benchmark/tests/train/FormulaTrainTest.java b/src/it/java/io/deephaven/benchmark/tests/train/FormulaTrainTest.java index e88ad340..d6976e45 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/FormulaTrainTest.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/FormulaTrainTest.java @@ -10,13 +10,13 @@ public class FormulaTrainTest { final TrainTestRunner runner = new TrainTestRunner(this); - void setup(double rowFactor) { - runner.tables(rowFactor, "timed"); + void setup(double staticRowFactor, double incRowFactor) { + runner.tables(staticRowFactor, incRowFactor, "timed"); } @Test void formulaUdf() { - setup(9); + setup(9, 9); var setup = """ def f_py(num1: float, num2: float) -> float: return (num2 + num1) / 2 @@ -30,14 +30,14 @@ def f_np(num1: np.float64, num2: np.float64) -> np.float64: @Test void formulaInline() { - setup(467); + setup(467, 467); var q = "timed.view(['New1 = (float)((num2 + num1) / 2)', 'New2 = (float)(num1 + num2)']).sum_by()"; runner.test("Formula- Inline 2 Calcs", 1, q, "num1", "num2"); } @Test void formulaDate() { - setup(2.7); + setup(3, 3); var q = """ timed.view([ 'New1 = parseDuration(`PT4H52M14S`).toHours()', diff --git a/src/it/java/io/deephaven/benchmark/tests/train/NaturalJoinTrainTest.java b/src/it/java/io/deephaven/benchmark/tests/train/NaturalJoinTrainTest.java index 6837bcc8..26ba8539 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/NaturalJoinTrainTest.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/NaturalJoinTrainTest.java @@ -10,13 +10,13 @@ public class NaturalJoinTrainTest { final TrainTestRunner runner = new TrainTestRunner(this); - void setup(double rowFactor) { - runner.tables(rowFactor, "timed", "right"); + void setup(double staticRowFactor, double incRowFactor) { + runner.tables(staticRowFactor, incRowFactor, "timed", "right"); } @Test void naturalJoinOn1Col() { - setup(230); + setup(230, 120); var r = "right = right.select_distinct(['r_wild'])"; runner.addSetupQuery(r); var q = "timed.natural_join(right, on=['key1 = r_wild'])"; @@ -25,7 +25,7 @@ void naturalJoinOn1Col() { @Test void naturalJoinOn3Cols() { - setup(100); + setup(100, 20); var q = "timed.natural_join(right, on=['key1 = r_wild', 'key2 = r_key2', 'key1 = r_key1'])"; runner.test("NaturalJoin- Join On 3 Cols", 0, q, "key1", "key2", "num1"); } diff --git a/src/it/java/io/deephaven/benchmark/tests/train/OrderedTrainTest.java b/src/it/java/io/deephaven/benchmark/tests/train/OrderedTrainTest.java index c1cb6d74..9f28a3f1 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/OrderedTrainTest.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/OrderedTrainTest.java @@ -11,8 +11,8 @@ public class OrderedTrainTest { final TrainTestRunner runner = new TrainTestRunner(this); - void setup(double rowFactor) { - runner.tables(rowFactor, "timed"); + void setup(double staticRowFactor, double incRowFactor) { + runner.tables(staticRowFactor, incRowFactor, "timed"); var setupStr = """ from deephaven import agg @@ -27,14 +27,14 @@ void setup(double rowFactor) { @Test void ordered0Groups() { - setup(145); + setup(145, 18); var q = "timed.agg_by(aggs)"; runner.test("Ordered- No Groups", 100, q, "key3", "key4", "num1", "num2"); } @Test void ordered2Groups() { - setup(22); + setup(22, 4); var q = "timed.agg_by(aggs, by=['key1', 'key2'])"; runner.test("Ordered- 2 Groups 10K Unique Combos", 10100, q, "key1", "key2", "key3", "key4", "num1", "num2"); } diff --git a/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java b/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java index 555d0487..370fd2ca 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java @@ -1,6 +1,7 @@ /* Copyright (c) 2026-2026 Deephaven Data Labs and Patent Pending */ package io.deephaven.benchmark.tests.train; +import java.util.*; import io.deephaven.benchmark.tests.standard.StandardTestRunner; /** @@ -16,6 +17,11 @@ final public class TrainTestRunner { static final int maxRowFactor = 850; final StandardTestRunner delegate; final long baseRowCount; + final List setupQueries = new ArrayList<>(); + final List teardownQueries = new ArrayList<>(); + private String headQuery = null; + private double staticRowFactor = 1; + private double incRowFactor = 1; TrainTestRunner(Object testInst) { this.delegate = new StandardTestRunner(testInst); @@ -23,27 +29,45 @@ final public class TrainTestRunner { delegate.useCachedSource(false); delegate.useLocalParquet(true); delegate.setRowFactor(maxRowFactor); - delegate.setScaleFactors(1, 0); // TODO: This is temporary for just-statics tests } - public void tables(double rowFactor, String... names) { + public void tables(double staticRowFactor, double incRowFactor, String... names) { + if (Math.max(staticRowFactor, incRowFactor) > maxRowFactor) + throw new IllegalArgumentException("Row factors cannot be greater than " + maxRowFactor); + this.staticRowFactor = staticRowFactor; + this.incRowFactor = incRowFactor; delegate.tables(names); - if (rowFactor > maxRowFactor) - throw new IllegalArgumentException("Row factor cannot be greater than " + maxRowFactor); - var q = "%s = %s.head(%d)".formatted(names[0], names[0], (long) (baseRowCount * rowFactor)); - delegate.addSetupQuery(q); + headQuery = "%s = %s.head(${trainRowCount})".formatted(names[0], names[0]); } public void addSetupQuery(String query) { - delegate.addSetupQuery(query); + setupQueries.add(query); } public void test(String name, long maxExpectedRowCount, String operation, String... loadColumns) { -// delegate.addSetupQuery(startJfrQuery); -// delegate.addSetupQuery(startUgpQuery); -// delegate.addTeardownQuery(stopUgpQuery); -// delegate.addTeardownQuery(stopJfrQuery); - delegate.test(name, maxExpectedRowCount, operation, loadColumns); + // setupQueries(startJfrQuery); + //setupQueries.add(startUgpQuery); + //teardownQueries.add(stopUgpQuery); + // teardownQueries(stopJfrQuery); + if (staticRowFactor > 0) { + delegate.setScaleFactors(1, 0); // Turn on Static and off Inc + var h = headQuery.replace("${trainRowCount}", String.valueOf((long) (baseRowCount * staticRowFactor))); + delegate.addSetupQuery(h); + setupQueries.forEach(delegate::addSetupQuery); + teardownQueries.forEach(delegate::addTeardownQuery); + delegate.test(name, maxExpectedRowCount, operation, loadColumns); + } + if (incRowFactor > 0) { + delegate.setScaleFactors(0, 1); // Turn off Static and on Inc + var h = headQuery.replace("${trainRowCount}", String.valueOf((long) (baseRowCount * incRowFactor))); + delegate.addSetupQuery(h); + setupQueries.forEach(delegate::addSetupQuery); + teardownQueries.forEach(delegate::addTeardownQuery); + delegate.test(name, maxExpectedRowCount, operation, loadColumns); + } else { + throw new IllegalStateException("At least one of staticRowFactor or incRowFactor must be > 0"); + } + } static final String startJfrQuery = """ @@ -52,15 +76,18 @@ public void test(String name, long maxExpectedRowCount, String operation, String rec = Recording() rec.setName("benchmark") - enabled_events=['jdk.GarbageCollection', 'jdk.GCPhasePause', 'jdk.GCPhaseConcurrent', 'jdk.GCCPUTime'] + enabled_events=['jdk.ExecutionSample','jdk.NativeMethodSample','jdk.ThreadCPULoad','jdk.GarbageCollection', + 'jdk.GCPhasePause','jdk.SafepointBegin','jdk.SafepointEnd','jdk.SafepointState', + 'jdk.ObjectAllocationInNewTLAB','jdk.ObjectAllocationOutsideTLAB'] for n in enabled_events: try: rec.enable(n) except Exception: print(f"Event Not Enabled: {n}") - disabled_events=['jdk.ExecutionSample', 'jdk.JavaMonitorEnter', 'jdk.JavaMonitorWait', 'jdk.ThreadSleep', - 'jdk.SocketRead', 'jdk.SocketWrite'] + disabled_events=['jdk.GCPhaseConcurrent','jdk.GCPhaseConcurrentMark','jdk.GCPhaseConcurrentEvacuation', + 'jdk.G1GarbageCollection','jdk.ShenandoahGarbageCollection','jdk.ZGarbageCollection','jdk.GCHeapSummary', + 'jdk.GCReferenceStatistics','jdk.GCWorkerData','jdk.GCCPUTime','jdk.GCPhasePause'] for n in disabled_events: try: rec.disable(n) @@ -107,16 +134,12 @@ raise TypeError(f"Unsupported JFR value type: {type(val)}") if etype == 'jdk.GarbageCollection': duration = getNanoValue(e, 'duration') - name = getEventValue(e, 'name') + name = 'sumOfPauses' value = getNanoValue(e, 'sumOfPauses') - elif etype == 'jdk.GCPhasePause' or etype == 'jdk.GCPhaseConcurrent': + elif etype == 'jdk.GCPhasePause': duration = getNanoValue(e, 'duration') name = getEventValue(e, 'name') value = duration - elif etype == 'jdk.GCCPUTime': - duration = getNanoValue(e, 'realTime') - name = "cpuTime" - value = getNanoValue(e, 'systemTime') + getNanoValue(e, 'userTime') else: continue @@ -145,7 +168,7 @@ raise TypeError(f"Unsupported JFR value type: {type(val)}") train_time_table = time_table("PT0.001S").tail(1) def train_ugp_update(update, is_replay): - train_ugp_times.append(time.perf_counter_ns()) + train_ugp_times.append((time.perf_counter_ns(), ${mainTable}.size)) train_ugp_listener = listen(train_time_table, train_ugp_update) """; diff --git a/src/it/java/io/deephaven/benchmark/tests/train/UpdateByTrainTest.java b/src/it/java/io/deephaven/benchmark/tests/train/UpdateByTrainTest.java index 956fb72c..cb9b679a 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/UpdateByTrainTest.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/UpdateByTrainTest.java @@ -19,8 +19,8 @@ public class UpdateByTrainTest { prod_after = rolling_prod_time(ts_col='timestamp',cols=['E=num1','F=num2'],rev_time='-PT1M',fwd_time='PT4M') """; - void setup(double rowFactor) { - runner.tables(rowFactor, "timed"); + void setup(double staticRowFactor, double incRowFactor) { + runner.tables(staticRowFactor, incRowFactor, "timed"); var setup = """ from deephaven.updateby import rolling_avg_time, rolling_max_tick, rolling_prod_time from deephaven.updateby import ema_tick, cum_min, cum_sum @@ -34,7 +34,7 @@ void setup(double rowFactor) { @Test void mixedComboNoGroups() { - setup(21.8); + setup(21.8, 17); runner.addSetupQuery(noGroups); var q = "timed.update_by(ops=[avg_contains, max_before, prod_after, ema_tick_op, min_op, sum_op])"; runner.test("UpdateBy- No Groups 12 Cols", 0, q, "num1", "num2", "timestamp"); @@ -42,7 +42,7 @@ void mixedComboNoGroups() { @Test void rollingCombo2Groups() { - setup(5.8); + setup(5.8, 4.2); runner.addSetupQuery(group10K); var q = """ timed.update_by(ops=[avg_contains,max_before,prod_after,ema_tick_op,min_op,sum_op], by=['key1','key2']) From 29389924f64c2b7c6b6ca3c788188b05a9119197 Mon Sep 17 00:00:00 2001 From: stanbrub Date: Wed, 22 Apr 2026 18:28:41 -0600 Subject: [PATCH 21/25] Better separation for running static and inc. Added ugp deltas --- .../tests/train/OrderedTrainTest.java | 2 +- .../tests/train/TrainTestRunner.java | 80 ++++++++++--------- 2 files changed, 45 insertions(+), 37 deletions(-) diff --git a/src/it/java/io/deephaven/benchmark/tests/train/OrderedTrainTest.java b/src/it/java/io/deephaven/benchmark/tests/train/OrderedTrainTest.java index 9f28a3f1..314de3c1 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/OrderedTrainTest.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/OrderedTrainTest.java @@ -34,7 +34,7 @@ void ordered0Groups() { @Test void ordered2Groups() { - setup(22, 4); + setup(22, 4.2); var q = "timed.agg_by(aggs, by=['key1', 'key2'])"; runner.test("Ordered- 2 Groups 10K Unique Combos", 10100, q, "key1", "key2", "key3", "key4", "num1", "num2"); } diff --git a/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java b/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java index 370fd2ca..caaae81b 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java @@ -15,20 +15,15 @@ */ final public class TrainTestRunner { static final int maxRowFactor = 850; - final StandardTestRunner delegate; - final long baseRowCount; + final Object testInst; final List setupQueries = new ArrayList<>(); final List teardownQueries = new ArrayList<>(); - private String headQuery = null; private double staticRowFactor = 1; private double incRowFactor = 1; + private String[] tableNames = null; TrainTestRunner(Object testInst) { - this.delegate = new StandardTestRunner(testInst); - this.baseRowCount = delegate.getGeneratedRowCount(); - delegate.useCachedSource(false); - delegate.useLocalParquet(true); - delegate.setRowFactor(maxRowFactor); + this.testInst = testInst; } public void tables(double staticRowFactor, double incRowFactor, String... names) { @@ -36,8 +31,7 @@ public void tables(double staticRowFactor, double incRowFactor, String... names) throw new IllegalArgumentException("Row factors cannot be greater than " + maxRowFactor); this.staticRowFactor = staticRowFactor; this.incRowFactor = incRowFactor; - delegate.tables(names); - headQuery = "%s = %s.head(${trainRowCount})".formatted(names[0], names[0]); + tableNames = names; } public void addSetupQuery(String query) { @@ -45,29 +39,40 @@ public void addSetupQuery(String query) { } public void test(String name, long maxExpectedRowCount, String operation, String... loadColumns) { + if (staticRowFactor <= 0 && incRowFactor <= 0) + throw new IllegalStateException("At least one of staticRowFactor or incRowFactor must be > 0"); + // setupQueries(startJfrQuery); - //setupQueries.add(startUgpQuery); - //teardownQueries.add(stopUgpQuery); + setupQueries.add(startUgpQuery); + teardownQueries.add(stopUgpQuery); // teardownQueries(stopJfrQuery); - if (staticRowFactor > 0) { - delegate.setScaleFactors(1, 0); // Turn on Static and off Inc - var h = headQuery.replace("${trainRowCount}", String.valueOf((long) (baseRowCount * staticRowFactor))); - delegate.addSetupQuery(h); - setupQueries.forEach(delegate::addSetupQuery); - teardownQueries.forEach(delegate::addTeardownQuery); - delegate.test(name, maxExpectedRowCount, operation, loadColumns); - } - if (incRowFactor > 0) { - delegate.setScaleFactors(0, 1); // Turn off Static and on Inc - var h = headQuery.replace("${trainRowCount}", String.valueOf((long) (baseRowCount * incRowFactor))); - delegate.addSetupQuery(h); - setupQueries.forEach(delegate::addSetupQuery); - teardownQueries.forEach(delegate::addTeardownQuery); - delegate.test(name, maxExpectedRowCount, operation, loadColumns); - } else { - throw new IllegalStateException("At least one of staticRowFactor or incRowFactor must be > 0"); - } + if (staticRowFactor > 0) + test(name, maxExpectedRowCount, operation, staticRowFactor, true, loadColumns); + + if (incRowFactor > 0) + test(name, maxExpectedRowCount, operation, incRowFactor, false, loadColumns); + } + + void test(String name, long maxExpectedRowCount, String operation, double rowFactor, boolean isStatic, + String... loadColumns) { + var delegate = new StandardTestRunner(testInst); + var baseRowCount = delegate.getGeneratedRowCount(); + delegate.useCachedSource(false); + delegate.useLocalParquet(true); + delegate.setRowFactor(maxRowFactor); + delegate.tables(tableNames); + delegate.setScaleFactors(isStatic ? 1 : 0, isStatic ? 0 : 1); + + var headQuery = """ + ${mainTable} = ${mainTable}.head(${trainRowCount}) + loaded_tbl_size = ${mainTable}.size + """.replace("${trainRowCount}", String.valueOf((long) (baseRowCount * rowFactor))); + + delegate.addSetupQuery(headQuery); + setupQueries.forEach(delegate::addSetupQuery); + teardownQueries.forEach(delegate::addTeardownQuery); + delegate.test(name, maxExpectedRowCount, operation, loadColumns); } static final String startJfrQuery = """ @@ -164,7 +169,7 @@ raise TypeError(f"Unsupported JFR value type: {type(val)}") if 'train_ugp_listener' in globals(): train_ugp_listener.stop() train_wall_epoch_ns = time.time_ns() - train_ugp_times = [time.perf_counter_ns()] + train_ugp_times = [(time.perf_counter_ns(), 0)] train_time_table = time_table("PT0.001S").tail(1) def train_ugp_update(update, is_replay): @@ -176,21 +181,24 @@ def train_ugp_update(update, is_replay): static final String stopUgpQuery = """ if 'train_ugp_listener' in globals(): train_ugp_listener.stop() if len(train_ugp_times) > 1: - mono_start = train_ugp_times[0] + mono_start = train_ugp_times[0][0] ugp_rows = [] for i in range(1, len(train_ugp_times)): - mono_prev = train_ugp_times[i - 1] - mono_curr = train_ugp_times[i] + mono_prev = train_ugp_times[i - 1][0] + mono_curr = train_ugp_times[i][0] + size_prev = train_ugp_times[i - 1][1] + size_curr = train_ugp_times[i][1] delta_ns = mono_curr - mono_prev wall_clock_ns = train_wall_epoch_ns + (mono_curr - mono_start) - ugp_rows.append([wall_clock_ns, delta_ns, mono_curr]) + delta_rows = max(0, size_curr - size_prev) + ugp_rows.append([wall_clock_ns, delta_ns, delta_rows]) ugp_events = new_table([ string_col("origin", ["deephaven-engine"] * len(ugp_rows)), string_col("type", ["ugp.delta"] * len(ugp_rows)), long_col("start_ns", [r[0] for r in ugp_rows]), long_col("duration_ns", [r[1] for r in ugp_rows]), - string_col("name", ["elapsedTime"] * len(ugp_rows)), + string_col("name", ["duration_rows"] * len(ugp_rows)), double_col("value", [float(r[2]) for r in ugp_rows]), ]) From 9b326e029286960728a45f5faa34e48951105553 Mon Sep 17 00:00:00 2001 From: stanbrub Date: Thu, 23 Apr 2026 16:29:34 -0600 Subject: [PATCH 22/25] turn on JFR metrics --- .../deephaven/benchmark/tests/train/TrainTestRunner.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java b/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java index caaae81b..290e6580 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java @@ -42,10 +42,10 @@ public void test(String name, long maxExpectedRowCount, String operation, String if (staticRowFactor <= 0 && incRowFactor <= 0) throw new IllegalStateException("At least one of staticRowFactor or incRowFactor must be > 0"); - // setupQueries(startJfrQuery); - setupQueries.add(startUgpQuery); - teardownQueries.add(stopUgpQuery); - // teardownQueries(stopJfrQuery); + setupQueries.add(startJfrQuery); + // setupQueries.add(startUgpQuery); + // teardownQueries.add(stopUgpQuery); + teardownQueries.add(stopJfrQuery); if (staticRowFactor > 0) test(name, maxExpectedRowCount, operation, staticRowFactor, true, loadColumns); From 7fe14ccaf0d9616bd5110cb780eefe8a2014078f Mon Sep 17 00:00:00 2001 From: stanbrub Date: Thu, 23 Apr 2026 17:22:00 -0600 Subject: [PATCH 23/25] Turn off Inc runs --- .../io/deephaven/benchmark/tests/train/TrainTestRunner.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java b/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java index 290e6580..4b70368a 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java @@ -50,8 +50,8 @@ public void test(String name, long maxExpectedRowCount, String operation, String if (staticRowFactor > 0) test(name, maxExpectedRowCount, operation, staticRowFactor, true, loadColumns); - if (incRowFactor > 0) - test(name, maxExpectedRowCount, operation, incRowFactor, false, loadColumns); +// if (incRowFactor > 0) +// test(name, maxExpectedRowCount, operation, incRowFactor, false, loadColumns); } void test(String name, long maxExpectedRowCount, String operation, double rowFactor, boolean isStatic, From 5e1d59c531073d2a5c2f5475933b87122d48453a Mon Sep 17 00:00:00 2001 From: stanbrub Date: Tue, 5 May 2026 16:23:08 -0600 Subject: [PATCH 24/25] Added ss_log budget metric --- .../tests/standard/StandardTestRunner.java | 2 + .../tests/train/TrainTestRunner.java | 41 +++++++++++++------ 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java b/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java index 31388404..ed364fc2 100644 --- a/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java +++ b/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java @@ -348,6 +348,8 @@ String getIncQuery(String name, String operation, long rowCount, String... loadC long_col("end_clock_nanos", [end_clock]), ]) ${teardownQueries} + print("STANDARD EVENTS: ", f'start_ns > {begin_clock}L', f'start_ns < {end_clock}L') + standard_events = standard_events.where([f'start_ns > {begin_clock}L', f'start_ns < {end_clock}L']) """; var read = getReadOperation(incFactor, rowCount, loadColumns); return populateQuery(name, incQuery, operation, read, loadColumns); diff --git a/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java b/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java index 4b70368a..374d6a23 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java @@ -43,15 +43,17 @@ public void test(String name, long maxExpectedRowCount, String operation, String throw new IllegalStateException("At least one of staticRowFactor or incRowFactor must be > 0"); setupQueries.add(startJfrQuery); - // setupQueries.add(startUgpQuery); - // teardownQueries.add(stopUgpQuery); + setupQueries.add(startUgpQuery); + teardownQueries.add(stopUgpQuery); teardownQueries.add(stopJfrQuery); + + operation += "\ntrain_ugp_listener = listen(result, train_ugp_update)"; - if (staticRowFactor > 0) - test(name, maxExpectedRowCount, operation, staticRowFactor, true, loadColumns); +// if (staticRowFactor > 0) +// test(name, maxExpectedRowCount, operation, staticRowFactor, true, loadColumns); -// if (incRowFactor > 0) -// test(name, maxExpectedRowCount, operation, incRowFactor, false, loadColumns); + if (incRowFactor > 0) + test(name, maxExpectedRowCount, operation, incRowFactor, false, loadColumns); } void test(String name, long maxExpectedRowCount, String operation, double rowFactor, boolean isStatic, @@ -74,7 +76,7 @@ void test(String name, long maxExpectedRowCount, String operation, double rowFac teardownQueries.forEach(delegate::addTeardownQuery); delegate.test(name, maxExpectedRowCount, operation, loadColumns); } - + static final String startJfrQuery = """ import jpy Recording = jpy.get_type("jdk.jfr.Recording") @@ -135,7 +137,7 @@ raise TypeError(f"Unsupported JFR value type: {type(val)}") for i in range(events.size()): e = events.get(i) etype = e.getEventType().getName() - start = e.getStartTime().getEpochSecond() * 1000000000 + e.getStartTime().getNano(); + start = e.getStartTime().getEpochSecond() * 1000000000 + e.getStartTime().getNano() if etype == 'jdk.GarbageCollection': duration = getNanoValue(e, 'duration') @@ -163,19 +165,17 @@ raise TypeError(f"Unsupported JFR value type: {type(val)}") """; static final String startUgpQuery = """ - from deephaven import time_table + from deephaven import time_table, perfmon from deephaven.table_listener import listen import time + ss_log = perfmon.server_state_log() if 'train_ugp_listener' in globals(): train_ugp_listener.stop() train_wall_epoch_ns = time.time_ns() train_ugp_times = [(time.perf_counter_ns(), 0)] - train_time_table = time_table("PT0.001S").tail(1) def train_ugp_update(update, is_replay): train_ugp_times.append((time.perf_counter_ns(), ${mainTable}.size)) - - train_ugp_listener = listen(train_time_table, train_ugp_update) """; static final String stopUgpQuery = """ @@ -203,5 +203,22 @@ def train_ugp_update(update, is_replay): ]) standard_events = merge([standard_events, ugp_events]) + + ss_log = perfmon.server_state_log().snapshot() + if ss_log.size > 0: + ss_rows = [] + for row in ss_log.iter_dict(): + start = row['IntervalStartTime'].getEpochSecond() * 1000000000 + row['IntervalStartTime'].getNano() + ss_rows.append((start, row['IntervalCollectionTimeMicros'] * 1000, row['IntervalUGPCyclesOnBudget'])) + + ss_events = new_table([ + string_col("origin", ["deephaven-engine"] * len(ss_rows)), + string_col("type", ["server_state_log"] * len(ss_rows)), + long_col("start_ns", [r[0] for r in ss_rows]), + long_col("duration_ns", [r[1] for r in ss_rows]), + string_col("name", ["cycles.on.budget"] * len(ss_rows)), + double_col("value", [r[2] for r in ss_rows]), + ]) + standard_events = merge([standard_events, ss_events]) """; } From a1316d486ec24477a46677be8768528fe4f2f05c Mon Sep 17 00:00:00 2001 From: stanbrub Date: Wed, 6 May 2026 17:43:57 -0600 Subject: [PATCH 25/25] Added runner setting for auto tune cycle factor --- .../tests/standard/StandardTestRunner.java | 17 +++++++++++++++-- .../benchmark/tests/train/TrainTestRunner.java | 2 ++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java b/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java index ed364fc2..a456a483 100644 --- a/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java +++ b/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java @@ -34,9 +34,10 @@ final public class StandardTestRunner { private String mainTable = "source"; private Bench api; private Controller controller; + private int rowCountFactor = 1; private int staticFactor = 1; private int incFactor = 1; - private int rowCountFactor = 1; + private float incCycleFactor = 1.0f; private boolean useCachedSource = true; private boolean useLocalParquet = false; @@ -179,6 +180,16 @@ public void setScaleFactors(int staticFactor, int incFactor) { this.incFactor = incFactor; } + /** + * Set the cycle factor used for the autotune incremental release filter. This is a fraction that controls how many + * rows the filter attempts to release per cycle compared to the ugp cycle time. + * + * @param incCycleFactor the number of rows processed per cycle for the incremental release filter + */ + public void setIncCycleFactor(float incCycleFactor) { + this.incCycleFactor = incCycleFactor; + } + /** * Run a single operation test through the Bench API with no upper bound expected on the resulting row count * @@ -312,7 +323,7 @@ String getIncQuery(String name, String operation, long rowCount, String... loadC ${setupQueries} autotune = jpy.get_type('io.deephaven.engine.table.impl.select.AutoTuningIncrementalReleaseFilter') - source_filter = autotune(0, 1000000, 1.0, True) + source_filter = autotune(0, 1000000, ${incCycleFactor}, True) ${mainTable} = ${mainTable}.where(source_filter) if right: right_filter = autotune(0, 1010000, 1.0, True) @@ -365,6 +376,7 @@ String populateQuery(String name, String query, String operation, String read, S query = query.replace("${teardownQueries}", String.join("\n", teardownQueries)); query = query.replace("${logOperationBegin}", getLogSnippet("Begin", name)); query = query.replace("${logOperationEnd}", getLogSnippet("End", name)); + query = query.replace("${incCycleFactor}", "" + incCycleFactor); query = query.replace("${mainTable}", mainTable); return query; } @@ -393,6 +405,7 @@ Result runTest(String name, String warmupQuery, String mainQuery) { metrics.set("static.factor", staticFactor); metrics.set("inc.factor", incFactor); metrics.set("row.factor", rowCountFactor); + metrics.set("inc.cycle.factor", incCycleFactor); api.metrics().add(metrics); }).fetchAfter("standard_events", table -> { api.events().add(table); diff --git a/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java b/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java index 374d6a23..25f46349 100644 --- a/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java +++ b/src/it/java/io/deephaven/benchmark/tests/train/TrainTestRunner.java @@ -15,6 +15,7 @@ */ final public class TrainTestRunner { static final int maxRowFactor = 850; + static final float incCycleFactor = 0.95f; final Object testInst; final List setupQueries = new ArrayList<>(); final List teardownQueries = new ArrayList<>(); @@ -65,6 +66,7 @@ void test(String name, long maxExpectedRowCount, String operation, double rowFac delegate.setRowFactor(maxRowFactor); delegate.tables(tableNames); delegate.setScaleFactors(isStatic ? 1 : 0, isStatic ? 0 : 1); + delegate.setIncCycleFactor(incCycleFactor); var headQuery = """ ${mainTable} = ${mainTable}.head(${trainRowCount})