diff --git a/hyrise/Dockerfile b/hyrise/Dockerfile new file mode 100644 index 000000000..d0bc92ae3 --- /dev/null +++ b/hyrise/Dockerfile @@ -0,0 +1,82 @@ +FROM ubuntu:25.04 AS build + +ENV DEBIAN_FRONTEND=noninteractive +ENV HYRISE_HEADLESS_SETUP=1 + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + autoconf \ + bash-completion \ + bc \ + ca-certificates \ + clang-19 \ + clang-20 \ + cmake \ + curl \ + dos2unix \ + g++-13 \ + g++-15 \ + gcc-13 \ + gcc-15 \ + git \ + libboost-all-dev \ + libhwloc-dev \ + libncurses-dev \ + libnuma-dev \ + libnuma1 \ + libpq-dev \ + libreadline-dev \ + libsqlite3-dev \ + libtbb-dev \ + lld-20 \ + llvm-20 \ + lsb-release \ + make \ + ninja-build \ + parallel \ + postgresql-server-dev-all \ + python3 \ + python3-pip \ + software-properties-common \ + sudo \ + && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-15 90 --slave /usr/bin/g++ g++ /usr/bin/g++-15 \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Pin a specific Hyrise revision for reproducibility (defaults to master). +ARG HYRISE_REF=master +# Set NO_LTO=TRUE to disable Link Time Optimization (much faster build, slightly +# lower runtime performance — useful for development/testing). Default: LTO on. +ARG NO_LTO=FALSE + +WORKDIR /opt +RUN git clone https://github.com/hyrise/hyrise.git \ + && cd hyrise \ + && git checkout "${HYRISE_REF}" \ + && git submodule update --jobs 8 --init --recursive --depth 1 + +WORKDIR /opt/hyrise/cmake-build-release +RUN cmake -GNinja -DCMAKE_BUILD_TYPE=Release -DNO_LTO=${NO_LTO} .. \ + && ninja hyriseServer + +FROM ubuntu:25.04 +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + libboost-system1.83.0 \ + libboost-thread1.83.0 \ + libhwloc15 \ + libnuma1 \ + libsqlite3-0 \ + libtbb12 \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +COPY --from=build /opt/hyrise/cmake-build-release/hyriseServer /usr/local/bin/hyriseServer +COPY --from=build /opt/hyrise/cmake-build-release/lib/libhyrise_impl.so /usr/local/lib/ +COPY --from=build /opt/hyrise/cmake-build-release/third_party/jemalloc/lib/libjemalloc.so.2 /usr/local/lib/ +RUN ldconfig + +EXPOSE 5432 +ENTRYPOINT ["/usr/local/bin/hyriseServer"] +CMD ["5432"] diff --git a/hyrise/benchmark.sh b/hyrise/benchmark.sh new file mode 100755 index 000000000..eda2b0b2d --- /dev/null +++ b/hyrise/benchmark.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# Thin shim — actual flow is in lib/benchmark-common.sh. +# +# Hyrise has no on-disk persistence: every restart resurfaces with an empty +# catalog and the dataset must be re-loaded into RAM. BENCH_DURABLE=no makes +# the driver re-run ./load on every cold cycle (and roll that wall-clock into +# the cold-try timing) so each "cold" number genuinely measures +# load+query against a fresh in-memory dataset. +export BENCH_DOWNLOAD_SCRIPT="download-hits-csv" +export BENCH_DURABLE=no +exec ../lib/benchmark-common.sh diff --git a/hyrise/check b/hyrise/check new file mode 100755 index 000000000..4acaef658 --- /dev/null +++ b/hyrise/check @@ -0,0 +1,4 @@ +#!/bin/bash +set -e + +psql -h 127.0.0.1 -p 5432 -U postgres -c 'SELECT 1' >/dev/null diff --git a/hyrise/data-size b/hyrise/data-size new file mode 100755 index 000000000..d24cf0233 --- /dev/null +++ b/hyrise/data-size @@ -0,0 +1,10 @@ +#!/bin/bash +set -eu + +# Hyrise has no on-disk persistence; report the total in-memory segment +# size from meta_segments. Strip any whitespace so the driver's strict +# digits-only regex check passes. +psql -h 127.0.0.1 -p 5432 -U postgres -t -A \ + -c "SELECT SUM(estimated_size_in_bytes) FROM meta_segments WHERE table_name = 'hits';" \ + | tr -d '[:space:]' +echo diff --git a/hyrise/hits.csv.json b/hyrise/hits.csv.json new file mode 100644 index 000000000..e509bda8f --- /dev/null +++ b/hyrise/hits.csv.json @@ -0,0 +1,116 @@ +{ + "config": { + "rfc_mode": true, + "separator": ",", + "quote": "\"", + "escape": "\"", + "delimiter": "\n" + }, + "columns": [ + {"name": "WatchID", "type": "long", "nullable": false}, + {"name": "JavaEnable", "type": "int", "nullable": false}, + {"name": "Title", "type": "string", "nullable": false}, + {"name": "GoodEvent", "type": "int", "nullable": false}, + {"name": "EventTime", "type": "string", "nullable": false}, + {"name": "EventDate", "type": "string", "nullable": false}, + {"name": "CounterID", "type": "int", "nullable": false}, + {"name": "ClientIP", "type": "int", "nullable": false}, + {"name": "RegionID", "type": "int", "nullable": false}, + {"name": "UserID", "type": "long", "nullable": false}, + {"name": "CounterClass", "type": "int", "nullable": false}, + {"name": "OS", "type": "int", "nullable": false}, + {"name": "UserAgent", "type": "int", "nullable": false}, + {"name": "URL", "type": "string", "nullable": false}, + {"name": "Referer", "type": "string", "nullable": false}, + {"name": "IsRefresh", "type": "int", "nullable": false}, + {"name": "RefererCategoryID", "type": "int", "nullable": false}, + {"name": "RefererRegionID", "type": "int", "nullable": false}, + {"name": "URLCategoryID", "type": "int", "nullable": false}, + {"name": "URLRegionID", "type": "int", "nullable": false}, + {"name": "ResolutionWidth", "type": "int", "nullable": false}, + {"name": "ResolutionHeight", "type": "int", "nullable": false}, + {"name": "ResolutionDepth", "type": "int", "nullable": false}, + {"name": "FlashMajor", "type": "int", "nullable": false}, + {"name": "FlashMinor", "type": "int", "nullable": false}, + {"name": "FlashMinor2", "type": "string", "nullable": false}, + {"name": "NetMajor", "type": "int", "nullable": false}, + {"name": "NetMinor", "type": "int", "nullable": false}, + {"name": "UserAgentMajor", "type": "int", "nullable": false}, + {"name": "UserAgentMinor", "type": "string", "nullable": false}, + {"name": "CookieEnable", "type": "int", "nullable": false}, + {"name": "JavascriptEnable", "type": "int", "nullable": false}, + {"name": "IsMobile", "type": "int", "nullable": false}, + {"name": "MobilePhone", "type": "int", "nullable": false}, + {"name": "MobilePhoneModel", "type": "string", "nullable": false}, + {"name": "Params", "type": "string", "nullable": false}, + {"name": "IPNetworkID", "type": "int", "nullable": false}, + {"name": "TraficSourceID", "type": "int", "nullable": false}, + {"name": "SearchEngineID", "type": "int", "nullable": false}, + {"name": "SearchPhrase", "type": "string", "nullable": false}, + {"name": "AdvEngineID", "type": "int", "nullable": false}, + {"name": "IsArtifical", "type": "int", "nullable": false}, + {"name": "WindowClientWidth", "type": "int", "nullable": false}, + {"name": "WindowClientHeight", "type": "int", "nullable": false}, + {"name": "ClientTimeZone", "type": "int", "nullable": false}, + {"name": "ClientEventTime", "type": "string", "nullable": false}, + {"name": "SilverlightVersion1", "type": "int", "nullable": false}, + {"name": "SilverlightVersion2", "type": "int", "nullable": false}, + {"name": "SilverlightVersion3", "type": "int", "nullable": false}, + {"name": "SilverlightVersion4", "type": "int", "nullable": false}, + {"name": "PageCharset", "type": "string", "nullable": false}, + {"name": "CodeVersion", "type": "int", "nullable": false}, + {"name": "IsLink", "type": "int", "nullable": false}, + {"name": "IsDownload", "type": "int", "nullable": false}, + {"name": "IsNotBounce", "type": "int", "nullable": false}, + {"name": "FUniqID", "type": "long", "nullable": false}, + {"name": "OriginalURL", "type": "string", "nullable": false}, + {"name": "HID", "type": "int", "nullable": false}, + {"name": "IsOldCounter", "type": "int", "nullable": false}, + {"name": "IsEvent", "type": "int", "nullable": false}, + {"name": "IsParameter", "type": "int", "nullable": false}, + {"name": "DontCountHits", "type": "int", "nullable": false}, + {"name": "WithHash", "type": "int", "nullable": false}, + {"name": "HitColor", "type": "string", "nullable": false}, + {"name": "LocalEventTime", "type": "string", "nullable": false}, + {"name": "Age", "type": "int", "nullable": false}, + {"name": "Sex", "type": "int", "nullable": false}, + {"name": "Income", "type": "int", "nullable": false}, + {"name": "Interests", "type": "int", "nullable": false}, + {"name": "Robotness", "type": "int", "nullable": false}, + {"name": "RemoteIP", "type": "int", "nullable": false}, + {"name": "WindowName", "type": "int", "nullable": false}, + {"name": "OpenerName", "type": "int", "nullable": false}, + {"name": "HistoryLength", "type": "int", "nullable": false}, + {"name": "BrowserLanguage", "type": "string", "nullable": false}, + {"name": "BrowserCountry", "type": "string", "nullable": false}, + {"name": "SocialNetwork", "type": "string", "nullable": false}, + {"name": "SocialAction", "type": "string", "nullable": false}, + {"name": "HTTPError", "type": "int", "nullable": false}, + {"name": "SendTiming", "type": "int", "nullable": false}, + {"name": "DNSTiming", "type": "int", "nullable": false}, + {"name": "ConnectTiming", "type": "int", "nullable": false}, + {"name": "ResponseStartTiming", "type": "int", "nullable": false}, + {"name": "ResponseEndTiming", "type": "int", "nullable": false}, + {"name": "FetchTiming", "type": "int", "nullable": false}, + {"name": "SocialSourceNetworkID", "type": "int", "nullable": false}, + {"name": "SocialSourcePage", "type": "string", "nullable": false}, + {"name": "ParamPrice", "type": "long", "nullable": false}, + {"name": "ParamOrderID", "type": "string", "nullable": false}, + {"name": "ParamCurrency", "type": "string", "nullable": false}, + {"name": "ParamCurrencyID", "type": "int", "nullable": false}, + {"name": "OpenstatServiceName", "type": "string", "nullable": false}, + {"name": "OpenstatCampaignID", "type": "string", "nullable": false}, + {"name": "OpenstatAdID", "type": "string", "nullable": false}, + {"name": "OpenstatSourceID", "type": "string", "nullable": false}, + {"name": "UTMSource", "type": "string", "nullable": false}, + {"name": "UTMMedium", "type": "string", "nullable": false}, + {"name": "UTMCampaign", "type": "string", "nullable": false}, + {"name": "UTMContent", "type": "string", "nullable": false}, + {"name": "UTMTerm", "type": "string", "nullable": false}, + {"name": "FromTag", "type": "string", "nullable": false}, + {"name": "HasGCLID", "type": "int", "nullable": false}, + {"name": "RefererHash", "type": "long", "nullable": false}, + {"name": "URLHash", "type": "long", "nullable": false}, + {"name": "CLID", "type": "int", "nullable": false} + ] +} diff --git a/hyrise/install b/hyrise/install new file mode 100755 index 000000000..3437caea6 --- /dev/null +++ b/hyrise/install @@ -0,0 +1,17 @@ +#!/bin/bash +set -eu + +sudo apt-get update -y +sudo apt-get install -y docker.io postgresql-client gzip + +# Hyrise has no upstream binary distribution and the build requires a +# recent toolchain (gcc-15 / clang-20). Build inside Docker on top of +# Ubuntu 25.04, then ship just the binary and its runtime libs in a slim +# image. +sudo docker build -t clickbench-hyrise . + +# The bind-mounted /data dir must exist before docker run (created here +# instead of in ./load so ./start can be called before the first ./load +# without erroring on a missing mount source). +mkdir -p data +chmod 777 data diff --git a/hyrise/load b/hyrise/load new file mode 100755 index 000000000..b1c81970b --- /dev/null +++ b/hyrise/load @@ -0,0 +1,22 @@ +#!/bin/bash +set -eu + +# Stage hits.csv next to hits.csv.json under data/. The bind mount on +# the container exposes this dir as /data:ro. +# +# Idempotent: BENCH_DURABLE=no triggers ./load again on every cold cycle. +# The first invocation moves hits.csv (delivered into cwd by +# download-hits-csv) into data/; subsequent invocations reuse the staged +# file because the dataset is many GB and re-downloading it per cycle +# would blow up the run time without changing the measurement. +if [ -f hits.csv ]; then + cp hits.csv.json data/ + mv -f hits.csv data/hits.csv +fi + +# COPY into a not-yet-existing table works because hits.csv.json next to +# the data file tells Hyrise's CSV parser the column types — running +# CREATE TABLE first and then COPY trips an internal Hyrise assertion. +psql -h 127.0.0.1 -p 5432 -U postgres -v ON_ERROR_STOP=1 -q \ + -c "COPY hits FROM '/data/hits.csv' WITH (FORMAT CSV);" +sync diff --git a/hyrise/queries.sql b/hyrise/queries.sql new file mode 100644 index 000000000..e29663a7b --- /dev/null +++ b/hyrise/queries.sql @@ -0,0 +1,43 @@ +SELECT COUNT(*) FROM hits; +SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; +SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; +SELECT AVG(UserID) FROM hits; +SELECT COUNT(DISTINCT UserID) FROM hits; +SELECT COUNT(DISTINCT SearchPhrase) FROM hits; +SELECT MIN(EventDate), MAX(EventDate) FROM hits; +SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; +SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; +SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; +SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; +SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; +SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; +SELECT UserID, EXTRACT(MINUTE FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID FROM hits WHERE UserID = 435090932899640449; +SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; +SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; +SELECT CounterID, AVG(LENGTH(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(LENGTH(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; +SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; +SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; +SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; +SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; +SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; +SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; +SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; +SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; +SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; +SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; diff --git a/hyrise/query b/hyrise/query new file mode 100755 index 000000000..cbfc4cef0 --- /dev/null +++ b/hyrise/query @@ -0,0 +1,33 @@ +#!/bin/bash +# Reads a SQL query from stdin, runs it via psql against Hyrise. +# Stdout: query result. +# Stderr: query runtime in fractional seconds on the last line (parsed +# from psql's `\timing` "Time: ms" output). +# Exit non-zero on error — the driver records `null` for failing queries. +# +# Hyrise's SQL coverage is limited: LENGTH, REGEXP_REPLACE, DATE_TRUNC, +# and OFFSET all fail (Q28, Q29, Q39–Q43). With ON_ERROR_STOP=1 those +# raise an ERROR and psql exits non-zero — we propagate that exit code +# so the driver records `null` rather than the `\timing` line that psql +# still emits for the meta-command. +set -e + +query=$(cat) + +raw=$(psql -h 127.0.0.1 -p 5432 -U postgres -v ON_ERROR_STOP=1 -t \ + -c '\timing' -c "$query" 2>&1) && exit_code=0 || exit_code=$? + +if [ "$exit_code" -ne 0 ] || printf '%s\n' "$raw" | grep -qE '^(ERROR|FATAL|PANIC):|psql: error'; then + printf '%s\n' "$raw" >&2 + exit 1 +fi + +printf '%s\n' "$raw" | grep -v '^Time:' + +ms=$(printf '%s\n' "$raw" | grep -oP 'Time:\s*\K[0-9.]+' | tail -n1) +if [ -z "$ms" ]; then + echo "no Time: in psql output" >&2 + exit 1 +fi + +awk -v m="$ms" 'BEGIN { printf "%.3f\n", m / 1000 }' >&2 diff --git a/hyrise/start b/hyrise/start new file mode 100755 index 000000000..809fc08d8 --- /dev/null +++ b/hyrise/start @@ -0,0 +1,23 @@ +#!/bin/bash +set -eu + +CONTAINER_NAME=${CONTAINER_NAME:-hyrise} + +# If already running, leave it alone — the cold-cycle driver invokes +# ./start after ./stop, but ./check is the authoritative readiness signal +# so this idempotency just avoids a redundant restart. +if [ "$(sudo docker inspect -f '{{.State.Running}}' "$CONTAINER_NAME" 2>/dev/null || echo false)" = "true" ]; then + exit 0 +fi + +# Recreate from scratch every cold cycle. Hyrise has no on-disk state, so +# `docker start` on a previously-stopped container would give us an empty +# DB anyway — recreating means we don't have to worry about a stale +# container holding the port or a stale bind-mount path. +sudo docker rm -f "$CONTAINER_NAME" >/dev/null 2>&1 || true + +sudo docker run -d --name "$CONTAINER_NAME" \ + -p 5432:5432 \ + -v "$(pwd)/data:/data:ro" \ + --ulimit nofile=1048576:1048576 \ + clickbench-hyrise >/dev/null diff --git a/hyrise/stop b/hyrise/stop new file mode 100755 index 000000000..7cfc3487c --- /dev/null +++ b/hyrise/stop @@ -0,0 +1,3 @@ +#!/bin/bash + +sudo docker stop hyrise >/dev/null 2>&1 || true diff --git a/hyrise/template.json b/hyrise/template.json new file mode 100644 index 000000000..6800d778d --- /dev/null +++ b/hyrise/template.json @@ -0,0 +1,12 @@ +{ + "system": "Hyrise", + "proprietary": "no", + "hardware": "cpu", + "tuned": "no", + "tags": [ + "C++", + "column-oriented", + "in-memory", + "PostgreSQL compatible" + ] +}