+
+
+
+`
+}
+
+const parseRedirectsYaml = (source: string): RedirectEntry[] => {
+ const parsed = yaml.load(source)
+ if (parsed === null || parsed === undefined) return []
+ if (typeof parsed !== 'object' || Array.isArray(parsed)) {
+ throw new Error('redirects.yml must be a YAML mapping at the top level')
+ }
+ const entries: RedirectEntry[] = []
+ for (const [from, to] of Object.entries(parsed as Record)) {
+ if (typeof to !== 'string') {
+ throw new Error(`redirect target for "${from}" must be a string, got ${typeof to}`)
+ }
+ if (!from.startsWith('/')) {
+ throw new Error(`redirect source "${from}" must start with a leading slash`)
+ }
+ entries.push({ from, to })
+ }
+ return entries
+}
+
+const stripLeadingSlash = (p: string): string => (p.startsWith('/') ? p.slice(1) : p)
+const stripTrailingSlash = (p: string): string => (p.endsWith('/') ? p.slice(0, -1) : p)
+
+interface RedirectsPluginOptions {
+ yamlPath: string
+ outDir: string
+}
+
+export const redirectsPlugin = (options: RedirectsPluginOptions): Plugin => {
+ // VitePress emits both SSR and client bundles, so closeBundle fires twice.
+ // Only emit stubs on the first call.
+ let written = false
+ return {
+ name: 'zarr-redirects',
+ apply: 'build',
+ closeBundle() {
+ if (written) return
+ written = true
+
+ const yamlSource = readFileSync(options.yamlPath, 'utf8')
+ const entries = parseRedirectsYaml(yamlSource)
+
+ for (const { from, to } of entries) {
+ const cleaned = stripTrailingSlash(stripLeadingSlash(from))
+ const outPath = cleaned === ''
+ ? join(options.outDir, 'index.html')
+ : join(options.outDir, cleaned, 'index.html')
+
+ if (existsSync(outPath)) {
+ throw new Error(
+ `Redirect "${from}" collides with an existing build output at ${outPath}. ` +
+ `Either remove the redirect or rename the page that produced ${outPath}.`,
+ )
+ }
+
+ mkdirSync(dirname(outPath), { recursive: true })
+ writeFileSync(outPath, renderRedirectHtml(to), 'utf8')
+ }
+
+ console.log(`[zarr-redirects] wrote ${entries.length} redirect stubs`)
+ },
+ }
+}
+
+export const defaultPluginOptions = (rootDir: string): RedirectsPluginOptions => ({
+ yamlPath: resolve(rootDir, '.vitepress/redirects.yml'),
+ outDir: resolve(rootDir, '.vitepress/dist'),
+})
diff --git a/.vitepress/redirects.yml b/.vitepress/redirects.yml
new file mode 100644
index 0000000..710f35a
--- /dev/null
+++ b/.vitepress/redirects.yml
@@ -0,0 +1,36 @@
+# Each entry maps a path on zarr.dev to an absolute or relative target URL.
+# A buildEnd hook turns this into static meta-refresh stub HTML files
+# under .vitepress/dist//index.html.
+
+# numcodecs codec docs (mirrors the deleted numcodecs_redirects/ verbatim)
+/numcodecs/adler32: https://numcodecs.readthedocs.io/en/stable/checksum32.html#adler32
+/numcodecs/astype: https://numcodecs.readthedocs.io/en/stable/astype.html
+/numcodecs/bitround: https://numcodecs.readthedocs.io/en/stable/bitround.html
+/numcodecs/blosc: https://numcodecs.readthedocs.io/en/stable/blosc.html
+/numcodecs/bz2: https://numcodecs.readthedocs.io/en/stable/bz2.html
+/numcodecs/crc32: https://numcodecs.readthedocs.io/en/stable/checksum32.html#crc32
+/numcodecs/delta: https://numcodecs.readthedocs.io/en/stable/delta.html
+/numcodecs/fixedscaleoffset: https://numcodecs.readthedocs.io/en/stable/fixedscaleoffset.html
+/numcodecs/fletcher32: https://numcodecs.readthedocs.io/en/stable/checksum32.html#fletcher32
+/numcodecs/gzip: https://numcodecs.readthedocs.io/en/stable/gzip.html
+/numcodecs/jenkins_lookup3: https://numcodecs.readthedocs.io/en/stable/checksum32.html#jenkinslookup3
+/numcodecs/lz4: https://numcodecs.readthedocs.io/en/stable/lz4.html
+/numcodecs/lzma: https://numcodecs.readthedocs.io/en/stable/lzma.html
+/numcodecs: https://numcodecs.readthedocs.io/en/stable/
+/numcodecs/packbits: https://numcodecs.readthedocs.io/en/stable/packbits.html
+/numcodecs/pcodec: https://numcodecs.readthedocs.io/en/stable/pcodec.html
+/numcodecs/quantize: https://numcodecs.readthedocs.io/en/stable/quantize.html
+/numcodecs/shuffle: https://numcodecs.readthedocs.io/en/stable/shuffle.html
+/numcodecs/zfpy: https://numcodecs.readthedocs.io/en/stable/zfpy.html
+/numcodecs/zlib: https://numcodecs.readthedocs.io/en/stable/zlib.html
+/numcodecs/zstd: https://numcodecs.readthedocs.io/en/stable/zstd.html
+
+# URL hygiene: catch any external link to /office_hours/
+/office_hours/: /office-hours/
+
+# Restored behavior of the deleted about/index.html stub
+/about: /
+
+# Vestigial 2019 blog posts -> blog repo
+/2019/05/02/zarr-2.3-release/: https://zarr.dev/blog/release-23/
+/2019/06/19/zarr-v3-update/: https://zarr.dev/blog/v3-update/
diff --git a/.vitepress/theme/custom.css b/.vitepress/theme/custom.css
new file mode 100644
index 0000000..1032454
--- /dev/null
+++ b/.vitepress/theme/custom.css
@@ -0,0 +1,82 @@
+:root {
+ --vp-c-brand-1: #952f7c;
+ --vp-c-brand-2: #c93d80;
+ --vp-c-brand-3: #c93d80;
+ --vp-c-brand-soft: rgba(201, 61, 128, 0.14);
+
+ --vp-home-hero-image-background-image: linear-gradient(-45deg, #d97e6a 20%, #c93d80 80%);
+ --vp-home-hero-image-filter: blur(56px);
+}
+
+.dark {
+ --vp-c-brand-1: #f1a8c8;
+ --vp-c-brand-2: #e57aa9;
+ --vp-c-brand-3: #e57aa9;
+ --vp-c-brand-soft: rgba(229, 122, 169, 0.16);
+}
+
+.VPHero .VPImage {
+ border-radius: 14px;
+ box-shadow: 0 24px 64px -16px rgba(149, 47, 124, 0.45),
+ 0 8px 24px -8px rgba(149, 47, 124, 0.25);
+}
+
+@media (min-width: 960px) {
+ .VPHero .image-container {
+ width: 420px;
+ height: 420px;
+ }
+ .VPHero .image-src {
+ max-width: 100%;
+ max-height: 100%;
+ }
+}
+
+.adopters-grid {
+ display: grid;
+ grid-template-columns: repeat(auto-fill, minmax(300px, 1fr));
+ gap: 20px;
+ margin: 32px 0;
+}
+
+.adopter-card {
+ display: flex;
+ flex-direction: column;
+ gap: 16px;
+ padding: 24px;
+ border: 1px solid var(--vp-c-divider);
+ border-radius: 12px;
+ background: var(--vp-c-bg-soft);
+ transition: border-color 0.25s ease, transform 0.25s ease;
+}
+
+.adopter-card:hover {
+ border-color: var(--vp-c-brand-1);
+ transform: translateY(-2px);
+}
+
+.adopter-card > p:first-child {
+ display: flex;
+ align-items: center;
+ justify-content: center;
+ height: 80px;
+ margin: 0;
+}
+
+.adopter-card > p:first-child a {
+ display: inline-flex;
+ text-decoration: none;
+}
+
+.adopter-logo {
+ height: 80px;
+ max-width: 100%;
+ width: auto;
+ object-fit: contain;
+}
+
+.adopter-card p {
+ font-size: 0.95em;
+ line-height: 1.6;
+ margin: 0;
+}
diff --git a/.vitepress/theme/index.ts b/.vitepress/theme/index.ts
new file mode 100644
index 0000000..42fe9a9
--- /dev/null
+++ b/.vitepress/theme/index.ts
@@ -0,0 +1,4 @@
+import DefaultTheme from 'vitepress/theme'
+import './custom.css'
+
+export default DefaultTheme
diff --git a/404.html b/404.html
deleted file mode 100644
index c472b4e..0000000
--- a/404.html
+++ /dev/null
@@ -1,24 +0,0 @@
----
-layout: default
----
-
-
-
-
-
404
-
-
Page not found :(
-
The requested page could not be found.
-
diff --git a/Dockerfile b/Dockerfile
deleted file mode 100644
index fcc4994..0000000
--- a/Dockerfile
+++ /dev/null
@@ -1,13 +0,0 @@
-FROM ruby:3.4-slim
-
-RUN gem install bundler
-
-COPY . /src
-WORKDIR /src
-
-RUN apt-get update -y
-RUN apt-get install ruby-dev -y
-RUN apt-get install make gcc g++ -y
-RUN bundle install
-RUN cat Gemfile.lock
-ENTRYPOINT bundle exec jekyll serve
diff --git a/Gemfile b/Gemfile
deleted file mode 100644
index 0838a70..0000000
--- a/Gemfile
+++ /dev/null
@@ -1,34 +0,0 @@
-source "https://rubygems.org"
-
-# Hello! This is where you manage which Jekyll version is used to run.
-# When you want to use a different version, change it below, save the
-# file and run `bundle install`. Run Jekyll with `bundle exec`, like so:
-#
-# bundle exec jekyll serve
-#
-# This will help ensure the proper Jekyll version is running.
-# Happy Jekylling!
-
-gem "kramdown-parser-gfm"
-
-# If you want to use GitHub Pages, remove the "gem "jekyll"" above and
-# uncomment the line below. To upgrade, run `bundle update github-pages`.
-gem "github-pages", group: :jekyll_plugins
-
-# If you have any plugins, put them here!
-group :jekyll_plugins do
- gem "jekyll-feed", "~> 0.17"
- gem "jekyll-include-cache", "~> 0.2"
- gem "jekyll-sitemap", "~> 1.4"
-end
-
-# Windows does not include zoneinfo files, so bundle the tzinfo-data gem
-gem "tzinfo-data", platforms: [:mingw, :mswin, :x64_mingw, :jruby]
-
-# Performance-booster for watching directories on Windows
-gem "wdm", "~> 0.2.0" if Gem.win_platform?
-
-
-gem "webrick", "~> 1.8"
-
-gem "jekyll-redirect-from"
diff --git a/Gemfile.lock b/Gemfile.lock
deleted file mode 100644
index 91c6721..0000000
--- a/Gemfile.lock
+++ /dev/null
@@ -1,316 +0,0 @@
-GEM
- remote: https://rubygems.org/
- specs:
- activesupport (8.0.2)
- base64
- benchmark (>= 0.3)
- bigdecimal
- concurrent-ruby (~> 1.0, >= 1.3.1)
- connection_pool (>= 2.2.5)
- drb
- i18n (>= 1.6, < 2)
- logger (>= 1.4.2)
- minitest (>= 5.1)
- securerandom (>= 0.3)
- tzinfo (~> 2.0, >= 2.0.5)
- uri (>= 0.13.1)
- addressable (2.8.7)
- public_suffix (>= 2.0.2, < 7.0)
- base64 (0.2.0)
- benchmark (0.4.0)
- bigdecimal (3.1.9)
- coffee-script (2.4.1)
- coffee-script-source
- execjs
- coffee-script-source (1.12.2)
- colorator (1.1.0)
- commonmarker (0.23.11)
- concurrent-ruby (1.3.5)
- connection_pool (2.5.1)
- csv (3.3.4)
- dnsruby (1.72.4)
- base64 (~> 0.2.0)
- logger (~> 1.6.5)
- simpleidn (~> 0.2.1)
- drb (2.2.1)
- em-websocket (0.5.3)
- eventmachine (>= 0.12.9)
- http_parser.rb (~> 0)
- ethon (0.16.0)
- ffi (>= 1.15.0)
- eventmachine (1.2.7)
- execjs (2.10.0)
- faraday (2.13.0)
- faraday-net_http (>= 2.0, < 3.5)
- json
- logger
- faraday-net_http (3.4.0)
- net-http (>= 0.5.0)
- ffi (1.17.2-aarch64-linux-gnu)
- ffi (1.17.2-aarch64-linux-musl)
- ffi (1.17.2-arm-linux-gnu)
- ffi (1.17.2-arm-linux-musl)
- ffi (1.17.2-arm64-darwin)
- ffi (1.17.2-x86_64-darwin)
- ffi (1.17.2-x86_64-linux-gnu)
- ffi (1.17.2-x86_64-linux-musl)
- forwardable-extended (2.6.0)
- gemoji (4.1.0)
- github-pages (232)
- github-pages-health-check (= 1.18.2)
- jekyll (= 3.10.0)
- jekyll-avatar (= 0.8.0)
- jekyll-coffeescript (= 1.2.2)
- jekyll-commonmark-ghpages (= 0.5.1)
- jekyll-default-layout (= 0.1.5)
- jekyll-feed (= 0.17.0)
- jekyll-gist (= 1.5.0)
- jekyll-github-metadata (= 2.16.1)
- jekyll-include-cache (= 0.2.1)
- jekyll-mentions (= 1.6.0)
- jekyll-optional-front-matter (= 0.3.2)
- jekyll-paginate (= 1.1.0)
- jekyll-readme-index (= 0.3.0)
- jekyll-redirect-from (= 0.16.0)
- jekyll-relative-links (= 0.6.1)
- jekyll-remote-theme (= 0.4.3)
- jekyll-sass-converter (= 1.5.2)
- jekyll-seo-tag (= 2.8.0)
- jekyll-sitemap (= 1.4.0)
- jekyll-swiss (= 1.0.0)
- jekyll-theme-architect (= 0.2.0)
- jekyll-theme-cayman (= 0.2.0)
- jekyll-theme-dinky (= 0.2.0)
- jekyll-theme-hacker (= 0.2.0)
- jekyll-theme-leap-day (= 0.2.0)
- jekyll-theme-merlot (= 0.2.0)
- jekyll-theme-midnight (= 0.2.0)
- jekyll-theme-minimal (= 0.2.0)
- jekyll-theme-modernist (= 0.2.0)
- jekyll-theme-primer (= 0.6.0)
- jekyll-theme-slate (= 0.2.0)
- jekyll-theme-tactile (= 0.2.0)
- jekyll-theme-time-machine (= 0.2.0)
- jekyll-titles-from-headings (= 0.5.3)
- jemoji (= 0.13.0)
- kramdown (= 2.4.0)
- kramdown-parser-gfm (= 1.1.0)
- liquid (= 4.0.4)
- mercenary (~> 0.3)
- minima (= 2.5.1)
- nokogiri (>= 1.16.2, < 2.0)
- rouge (= 3.30.0)
- terminal-table (~> 1.4)
- webrick (~> 1.8)
- github-pages-health-check (1.18.2)
- addressable (~> 2.3)
- dnsruby (~> 1.60)
- octokit (>= 4, < 8)
- public_suffix (>= 3.0, < 6.0)
- typhoeus (~> 1.3)
- html-pipeline (2.14.3)
- activesupport (>= 2)
- nokogiri (>= 1.4)
- http_parser.rb (0.8.0)
- i18n (1.14.7)
- concurrent-ruby (~> 1.0)
- jekyll (3.10.0)
- addressable (~> 2.4)
- colorator (~> 1.0)
- csv (~> 3.0)
- em-websocket (~> 0.5)
- i18n (>= 0.7, < 2)
- jekyll-sass-converter (~> 1.0)
- jekyll-watch (~> 2.0)
- kramdown (>= 1.17, < 3)
- liquid (~> 4.0)
- mercenary (~> 0.3.3)
- pathutil (~> 0.9)
- rouge (>= 1.7, < 4)
- safe_yaml (~> 1.0)
- webrick (>= 1.0)
- jekyll-avatar (0.8.0)
- jekyll (>= 3.0, < 5.0)
- jekyll-coffeescript (1.2.2)
- coffee-script (~> 2.2)
- coffee-script-source (~> 1.12)
- jekyll-commonmark (1.4.0)
- commonmarker (~> 0.22)
- jekyll-commonmark-ghpages (0.5.1)
- commonmarker (>= 0.23.7, < 1.1.0)
- jekyll (>= 3.9, < 4.0)
- jekyll-commonmark (~> 1.4.0)
- rouge (>= 2.0, < 5.0)
- jekyll-default-layout (0.1.5)
- jekyll (>= 3.0, < 5.0)
- jekyll-feed (0.17.0)
- jekyll (>= 3.7, < 5.0)
- jekyll-gist (1.5.0)
- octokit (~> 4.2)
- jekyll-github-metadata (2.16.1)
- jekyll (>= 3.4, < 5.0)
- octokit (>= 4, < 7, != 4.4.0)
- jekyll-include-cache (0.2.1)
- jekyll (>= 3.7, < 5.0)
- jekyll-mentions (1.6.0)
- html-pipeline (~> 2.3)
- jekyll (>= 3.7, < 5.0)
- jekyll-optional-front-matter (0.3.2)
- jekyll (>= 3.0, < 5.0)
- jekyll-paginate (1.1.0)
- jekyll-readme-index (0.3.0)
- jekyll (>= 3.0, < 5.0)
- jekyll-redirect-from (0.16.0)
- jekyll (>= 3.3, < 5.0)
- jekyll-relative-links (0.6.1)
- jekyll (>= 3.3, < 5.0)
- jekyll-remote-theme (0.4.3)
- addressable (~> 2.0)
- jekyll (>= 3.5, < 5.0)
- jekyll-sass-converter (>= 1.0, <= 3.0.0, != 2.0.0)
- rubyzip (>= 1.3.0, < 3.0)
- jekyll-sass-converter (1.5.2)
- sass (~> 3.4)
- jekyll-seo-tag (2.8.0)
- jekyll (>= 3.8, < 5.0)
- jekyll-sitemap (1.4.0)
- jekyll (>= 3.7, < 5.0)
- jekyll-swiss (1.0.0)
- jekyll-theme-architect (0.2.0)
- jekyll (> 3.5, < 5.0)
- jekyll-seo-tag (~> 2.0)
- jekyll-theme-cayman (0.2.0)
- jekyll (> 3.5, < 5.0)
- jekyll-seo-tag (~> 2.0)
- jekyll-theme-dinky (0.2.0)
- jekyll (> 3.5, < 5.0)
- jekyll-seo-tag (~> 2.0)
- jekyll-theme-hacker (0.2.0)
- jekyll (> 3.5, < 5.0)
- jekyll-seo-tag (~> 2.0)
- jekyll-theme-leap-day (0.2.0)
- jekyll (> 3.5, < 5.0)
- jekyll-seo-tag (~> 2.0)
- jekyll-theme-merlot (0.2.0)
- jekyll (> 3.5, < 5.0)
- jekyll-seo-tag (~> 2.0)
- jekyll-theme-midnight (0.2.0)
- jekyll (> 3.5, < 5.0)
- jekyll-seo-tag (~> 2.0)
- jekyll-theme-minimal (0.2.0)
- jekyll (> 3.5, < 5.0)
- jekyll-seo-tag (~> 2.0)
- jekyll-theme-modernist (0.2.0)
- jekyll (> 3.5, < 5.0)
- jekyll-seo-tag (~> 2.0)
- jekyll-theme-primer (0.6.0)
- jekyll (> 3.5, < 5.0)
- jekyll-github-metadata (~> 2.9)
- jekyll-seo-tag (~> 2.0)
- jekyll-theme-slate (0.2.0)
- jekyll (> 3.5, < 5.0)
- jekyll-seo-tag (~> 2.0)
- jekyll-theme-tactile (0.2.0)
- jekyll (> 3.5, < 5.0)
- jekyll-seo-tag (~> 2.0)
- jekyll-theme-time-machine (0.2.0)
- jekyll (> 3.5, < 5.0)
- jekyll-seo-tag (~> 2.0)
- jekyll-titles-from-headings (0.5.3)
- jekyll (>= 3.3, < 5.0)
- jekyll-watch (2.2.1)
- listen (~> 3.0)
- jemoji (0.13.0)
- gemoji (>= 3, < 5)
- html-pipeline (~> 2.2)
- jekyll (>= 3.0, < 5.0)
- json (2.10.2)
- kramdown (2.4.0)
- rexml
- kramdown-parser-gfm (1.1.0)
- kramdown (~> 2.0)
- liquid (4.0.4)
- listen (3.9.0)
- rb-fsevent (~> 0.10, >= 0.10.3)
- rb-inotify (~> 0.9, >= 0.9.10)
- logger (1.6.6)
- mercenary (0.3.6)
- minima (2.5.1)
- jekyll (>= 3.5, < 5.0)
- jekyll-feed (~> 0.9)
- jekyll-seo-tag (~> 2.1)
- minitest (5.25.5)
- net-http (0.6.0)
- uri
- nokogiri (1.18.8-aarch64-linux-gnu)
- racc (~> 1.4)
- nokogiri (1.18.8-aarch64-linux-musl)
- racc (~> 1.4)
- nokogiri (1.18.8-arm-linux-gnu)
- racc (~> 1.4)
- nokogiri (1.18.8-arm-linux-musl)
- racc (~> 1.4)
- nokogiri (1.18.8-arm64-darwin)
- racc (~> 1.4)
- nokogiri (1.18.8-x86_64-darwin)
- racc (~> 1.4)
- nokogiri (1.18.8-x86_64-linux-gnu)
- racc (~> 1.4)
- nokogiri (1.18.8-x86_64-linux-musl)
- racc (~> 1.4)
- octokit (4.25.1)
- faraday (>= 1, < 3)
- sawyer (~> 0.9)
- pathutil (0.16.2)
- forwardable-extended (~> 2.6)
- public_suffix (5.1.1)
- racc (1.8.1)
- rb-fsevent (0.11.2)
- rb-inotify (0.11.1)
- ffi (~> 1.0)
- rexml (3.4.1)
- rouge (3.30.0)
- rubyzip (2.4.1)
- safe_yaml (1.0.5)
- sass (3.7.4)
- sass-listen (~> 4.0.0)
- sass-listen (4.0.0)
- rb-fsevent (~> 0.9, >= 0.9.4)
- rb-inotify (~> 0.9, >= 0.9.7)
- sawyer (0.9.2)
- addressable (>= 2.3.5)
- faraday (>= 0.17.3, < 3)
- securerandom (0.4.1)
- simpleidn (0.2.3)
- terminal-table (1.8.0)
- unicode-display_width (~> 1.1, >= 1.1.1)
- typhoeus (1.4.1)
- ethon (>= 0.9.0)
- tzinfo (2.0.6)
- concurrent-ruby (~> 1.0)
- unicode-display_width (1.8.0)
- uri (1.0.3)
- webrick (1.9.1)
-
-PLATFORMS
- aarch64-linux-gnu
- aarch64-linux-musl
- arm-linux-gnu
- arm-linux-musl
- arm64-darwin
- x86_64-darwin
- x86_64-linux-gnu
- x86_64-linux-musl
-
-DEPENDENCIES
- github-pages
- jekyll-feed (~> 0.17)
- jekyll-include-cache (~> 0.2)
- jekyll-redirect-from
- jekyll-sitemap (~> 1.4)
- kramdown-parser-gfm
- tzinfo-data
- webrick (~> 1.8)
-
-BUNDLED WITH
- 2.6.8
diff --git a/slides/LICENSE b/NOTICE.md
similarity index 67%
rename from slides/LICENSE
rename to NOTICE.md
index 697d156..ff185ab 100644
--- a/slides/LICENSE
+++ b/NOTICE.md
@@ -1,4 +1,15 @@
-Copyright (C) 2019 Hakim El Hattab, http://hakim.se, and reveal.js contributors
+# Third-Party Notices
+
+This site bundles third-party assets that are governed by their own licenses.
+
+## Hero animation (`public/images/zarr-hero.gif`)
+
+Adapted from `dynamical-zarr-ecmwf.gif` in the [developmentseed/deck.gl-raster](https://github.com/developmentseed/deck.gl-raster) repository (MIT License).
+
+```
+MIT License
+
+Copyright (c) 2025 Development Seed
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -7,13 +18,14 @@ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
\ No newline at end of file
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+```
diff --git a/README.md b/README.md
index 7f9ba1f..199e22e 100644
--- a/README.md
+++ b/README.md
@@ -2,63 +2,23 @@
Main Zarr website hosted at https://zarr.dev
-## Building
+## Local development
-### Option 1: Using Pixi (Recommended)
+This site is built with [VitePress](https://vitepress.dev/). Requires Node.js 20+.
-The easiest way to build and serve the site is using [Pixi](https://pixi.sh), which manages all dependencies automatically.
-
-1. Install Pixi:
-```bash
-curl -fsSL https://pixi.sh/install.sh | bash
-```
-
-2. Start the development server:
```bash
-pixi run dev
-```
-
-The site will be available at http://localhost:4000 with live reload enabled. Pixi will automatically install all dependencies on first run.
-
-Additional commands:
-- `pixi run build` - Build the site for production
-- `pixi run install` - Install/update Ruby gems (if needed manually)
-
-### Option 2: Manual Ruby Installation
-
-To build the webpage manually, you will need a working Ruby installation.
-
-If you are on Ubuntu and want to install the requirements locally,
-follow the steps below as root. Alternatively, see the Dockerfile
-for an alternative.
-
-1. Install `rvm`.
-
+npm ci
+npm run docs:dev # dev server at http://localhost:5173/
+npm run docs:build # production build into .vitepress/dist/
+npm run docs:preview # preview the production build
```
-gpg --keyserver hkp://keyserver.ubuntu.com --recv-keys 409b6b1796c275462a1703113804bb82d39dc0e3 7d2baf1cf37b13e2069d6956105bd0e739499bdb && curl -sSL https://get.rvm.io | bash -s stable
-```
-
-2. Install Ruby
-```
-rvm autolibs disable && rvm install ruby 3.4.3
-```
-
-Note that if the above fails, you may need to specify the path for the `rvm` command in your system.
-
-3. Install `bundler`
-```
-gem install bundler
-```
+## Deployment
-4. In the root of the repository, run:
+The site deploys to GitHub Pages via `.github/workflows/deploy.yml` on every push to `main`. The custom domain `zarr.dev` is preserved by `public/CNAME`, which VitePress copies into the build output.
-```
-bundle install
-```
+## Redirects
-5. Build and serve the site locally:
+Site-wide redirects (e.g. `/numcodecs/blosc` → `numcodecs.readthedocs.io`) are defined as a single YAML map at `.vitepress/redirects.yml`. The build hook in `.vitepress/plugins/redirects.ts` generates one static `/index.html` meta-refresh stub per entry into the build output.
-```
-bundle exec jekyll serve
-```
+To add a redirect: append an entry to `.vitepress/redirects.yml` (`/path: target-url`) and rebuild.
diff --git a/_config.yml b/_config.yml
deleted file mode 100644
index 7c3dfa3..0000000
--- a/_config.yml
+++ /dev/null
@@ -1,89 +0,0 @@
-# Welcome to Jekyll!
-#
-# This config file is meant for settings that affect your whole blog, values
-# which you are expected to set up once and rarely edit after that. If you find
-# yourself editing this file very often, consider using Jekyll's data files
-# feature for the data you need to update frequently.
-#
-# For technical reasons, this file is *NOT* reloaded automatically when you use
-# 'bundle exec jekyll serve'. If you change this file, please restart the server process.
-
-# Site settings
-# These are used to personalize your new site. If you look in the HTML files,
-# you will see them accessed via {{ site.title }}, {{ site.email }}, and so on.
-# You can create any custom variable you would like, and they will be accessible
-# in the templates via {{ site.myvariable }}.
-
-title: Zarr
-subtitle: chunked, compressed, N-dimensional arrays
-
-description: >- # this means to ignore newlines until "baseurl:"
-
- Zarr is an open source project developing specifications and
- software libraries for storage of data that is structured as
- N-dimensional typed arrays (also known as tensors) in a way that is
- compatible with parallel and distributed computing applications.
-
-baseurl: "" # the subpath of your site, e.g. /blog
-
-url: "" # the base hostname & protocol for your site, e.g. http://example.com
-
-analytics:
- provider: "google"
- google:
- tracking_id: "G-BCRR9QE7Z0"
- anonymize_ip: true
-
-github_username: zarr-developers
-gitter_url: https://gitter.im/zarr-developers/community
-
-# Build settings
-markdown: kramdown
-remote_theme: "mmistakes/minimal-mistakes@4.26.2"
-minimal_mistakes_skin: "dirt"
-
-plugins:
- - jekyll-feed
- - jekyll-include-cache
- - jekyll-sitemap
- - jekyll-redirect-from
-
-
-# Exclude from processing.
-# The following items will not be processed, by default. Create a custom list
-# to override the default setting.
-exclude:
- - .pixi/
- - Gemfile
- - Gemfile.lock
- - node_modules
- - vendor/bundle/
- - vendor/cache/
- - vendor/gems/
- - vendor/ruby/
-
-# https://github.com/mmistakes/minimal-mistakes/blob/641ca6f3d8cd8fb0ae24e4f77b2e62a44a65e53b/_config.yml
-logo: android-chrome-512x512.png
-search: true
-search_full_content: true
-social:
- type: Organization
- name: Zarr
- links:
- - "https://bsky.app/profile/zarr.dev"
- - "https://github.com/zarr-developers"
-
-footer:
- links:
- - label: "Bluesky"
- icon: "fab fa-fw fa-bluesky"
- url: "https://bsky.app/profile/zarr.dev"
- - label: "Mastodon"
- icon: "fab fa-fw fa-mastodon"
- url: "https://fosstodon.org/@zarr"
- - label: "GitHub"
- icon: "fab fa-fw fa-github"
- url: "https://github.com/zarr-developers"
- - label: "Zulip"
- icon: "fas fa-comments"
- url: "https://ossci.zulipchat.com/"
diff --git a/_data/navigation.yml b/_data/navigation.yml
deleted file mode 100644
index 1eaf5d1..0000000
--- a/_data/navigation.yml
+++ /dev/null
@@ -1,43 +0,0 @@
-main:
- - title: "Documentation"
- url: "https://zarr.readthedocs.io/en/stable/"
- - title: "Contribute"
- url: "https://zarr.readthedocs.io/en/stable/developers/contributing.html"
- - title: "Python Quickstart"
- url: "https://zarr.readthedocs.io/en/stable/quickstart.html"
-
-sidebar:
- - title: About
- children:
- - title: "Description"
- url: '#description'
- - title: "Applications"
- url: '#applications'
- - title: "Features"
- url: '#features'
- - title: "Sponsorship"
- url: "#sponsorship"
- - title: "Videos"
- url: "#videos"
- - title: Subpages
- children:
- - title: "Adopters"
- url: "/adopters"
- - title: "Blog"
- url: '/blog'
- - title: "Community"
- url: '/community'
- - title: "Conventions"
- url: '/conventions'
- - title: "Datasets"
- url: '/datasets'
- - title: "Implementations"
- url: '/implementations'
- - title: "Office Hours"
- url: "/office-hours"
- - title: "Slides"
- url: "/slides"
- - title: "Specification"
- url: https://zarr-specs.readthedocs.io/
- - title: "ZEPs"
- url: '/zeps'
diff --git a/_includes/head/custom.html b/_includes/head/custom.html
deleted file mode 100644
index 694c641..0000000
--- a/_includes/head/custom.html
+++ /dev/null
@@ -1,351 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/_includes/masthead.html b/_includes/masthead.html
deleted file mode 100644
index 23b956c..0000000
--- a/_includes/masthead.html
+++ /dev/null
@@ -1,40 +0,0 @@
-{% capture logo_path %}{{ site.logo }}{% endcapture %}
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/_posts/2019-05-02-zarr-2.3-release.md b/_posts/2019-05-02-zarr-2.3-release.md
deleted file mode 100644
index d7c62cf..0000000
--- a/_posts/2019-05-02-zarr-2.3-release.md
+++ /dev/null
@@ -1,232 +0,0 @@
----
-title: "Zarr Python 2.3 release"
-date: 2019-05-23
-categories: zarr python release
-
-layout: home
-author_profile: false
-sidebar:
- title: "Content"
- nav: sidebar
----
-
-Recently we released version 2.3 of the [Python Zarr
-package](https://zarr.readthedocs.io/en/stable/), which implements the
-Zarr protocol for storing N-dimensional typed arrays, and is designed
-for use in distributed and parallel computing. This post provides an
-overview of new features in this release, and some information about
-future directions for Zarr.
-
-## New storage options for distributed and cloud computing
-
-A key feature of the Zarr protocol is that the underlying storage
-system is decoupled from other components via a simple key/value
-interface. In Python, this interface corresponds to the
-[`MutableMapping`
-interface](https://docs.python.org/3/glossary.html#term-mapping),
-which is the interface that Python
-[`dict`](https://docs.python.org/3/library/stdtypes.html#dict)
-implements. I.e., anything `dict`-like can be used to store Zarr
-data. The simplicity of this interface means it is relatively
-straightforward to add support for a range of different storage
-systems. The 2.3 release adds support for storage using [SQLite](
-https://zarr.readthedocs.io/en/stable/api/storage.html#zarr.storage.SQLiteStore
-), [Redis](
-https://zarr.readthedocs.io/en/stable/api/storage.html#zarr.storage.RedisStore
-), [MongoDB](
-https://zarr.readthedocs.io/en/stable/api/storage.html#zarr.storage.MongoDBStore
-) and [Azure Blob Storage](
-https://zarr.readthedocs.io/en/stable/api/storage.html#zarr.storage.ABSStore
-).
-
-For example, here's code that creates an array using MongoDB:
-
-{% highlight python %}
-import zarr
-store = zarr.MongoDBStore('localhost')
-root = zarr.group(store=store, overwrite=True)
-foo = bar.create_group('foo')
-bar = foo.create_dataset('bar', shape=(10000, 1000), chunks=(1000, 100))
-bar[:] = 42
-store.close()
-{% endhighlight %}
-
-To do the same thing but storing the data in the cloud via Azure
-Blob Storage, replace the instantiation of the `store` object with:
-
-{% highlight python %}
-store = zarr.ABSStore(container='test', account_name='foo', account_key='bar')
-{% endhighlight %}
-
-Support for other cloud object storage storage services was already
-available via other packages, with Amazon S3 supported via the [s3fs](
-http://s3fs.readthedocs.io/en/latest/ ) package, and Google Cloud
-Storage supported via the [gcsfs](
-https://gcsfs.readthedocs.io/en/latest/ ) package. Further notes on
-using cloud storage are available from the [Zarr
-tutorial](https://zarr.readthedocs.io/en/stable/tutorial.html#distributed-cloud-storage).
-
-The attraction of cloud storage is that total I/O bandwidth scales
-linearly with the size of a computing cluster, so there are no
-technical limits to the size of the data or computation you can scale
-up to. Here's a slide from a recent presentation by [Ryan
-Abernathey](https://github.com/rabernat) showing how I/O scales when
-using Zarr over Google Cloud Storage:
-
-
-
-## Optimisations for cloud storage: consolidated metadata
-
-One issue with using cloud object storage is that, although total I/O
-throughput can be high, the latency involved in each request to read
-the contents of an object can be >100 ms, even when reading from
-compute nodes within the same data centre. This latency can add up
-when reading metadata from many arrays, because in Zarr each array has
-its own metadata stored in a separate object.
-
-To work around this, the 2.3 release adds an experimental feature to
-consolidate metadata for all arrays and groups within a hierarchy into
-a single object, which can be read once via a single request. Although
-this is not suitable for rapidly changing datasets, it can be good for
-large datasets which are relatively static.
-
-To use this feature, two new convenience functions have been
-added. The
-[`consolidate_metadata()`](https://zarr.readthedocs.io/en/stable/api/convenience.html#zarr.convenience.consolidate_metadata)
-function performs the initial consolidation, reading all metadata and
-combining them into a single object. Once you have done that and
-deployed the data to a cloud object store, the
-[`open_consolidated()`](https://zarr.readthedocs.io/en/stable/api/convenience.html#zarr.convenience.open_consolidated)
-function can be used to read data, making use of the consolidated
-metadata.
-
-Support for the new consolidated metadata feature is also now
-available via
-[xarray](http://xarray.pydata.org/en/stable/generated/xarray.open_zarr.html)
-and
-[intake-xarray](https://intake-xarray.readthedocs.io/en/latest/index.html)
-(see [this blog
-post](https://www.anaconda.com/intake-taking-the-pain-out-of-data-access/)
-for an introduction to intake), and many of the datasets in [Pangeo's
-cloud data catalog](https://pangeo-data.github.io/pangeo-datastore/)
-use Zarr with consolidated metadata.
-
-Here's an example of how to open a Zarr dataset from Pangeo's data
-catalog via intake:
-
-{% highlight python %}
-import intake
-cat_url = 'https://raw.githubusercontent.com/pangeo-data/pangeo-datastore/master/intake-catalogs/master.yaml'
-cat = intake.Catalog(cat_url)
-ds = cat.atmosphere.gmet_v1.to_dask()
-{% endhighlight %}
-
-...and [here's the underlying catalog
-entry](https://github.com/pangeo-data/pangeo-datastore/blob/aa3f12bcc3be9584c1a9071235874c9d6af94a4e/intake-catalogs/atmosphere.yaml#L6).
-
-
-## Compatibility with N5
-
-Around the same time that development on Zarr was getting started, a
-separate team led by [Stephan Saafeld](https://github.com/axtimwalde)
-at the Janelia research campus was experiencing similar challenges
-storing and computing with large amounts of neural imaging data, and
-developed a software library called
-[N5](https://github.com/saalfeldlab/n5). N5 is implemented in Java but
-is very similar to Zarr in the approach it takes to storing both
-metadata and data chunks, and to decoupling the storage backend to
-enable efficient use of cloud storage.
-
-There is a lot of commonality between Zarr and N5 and we are working
-jointly to bring the two approaches together. As a first experimental
-step towards that goal, the Zarr 2.3 release includes an [N5 storage
-adapter](https://zarr.readthedocs.io/en/stable/api/n5.html#zarr.n5.N5Store)
-which allows reading and writing of data on disk in the N5
-format.
-
-
-## Support for the buffer protocol
-
-Zarr is intended to work efficiently across a range of different
-storage systems with different latencies and bandwidth, from cloud
-object stores to local disk and memory. In many of these settings,
-making efficient use of local memory, and avoiding memory copies
-wherever possible, can make a substantial difference to
-performance. This is particularly true within the
-[Numcodecs](http://numcodecs.rtfd.io) package, which is a companion to
-Zarr and provides implementations of compression and filter codecs
-such as Blosc and Zstandard. A key aspect of achieving fewer memory
-copies has been to leverage the Python buffer protocol.
-
-The [Python buffer
-protocol](https://docs.python.org/3/c-api/buffer.html) is a
-specification for how to share large blocks of memory between
-different libraries without copying. This protocol has evolved over
-time from its original introduction in Python 2 and later revamped
-implementation added in Python 3 (with backports to Python 2.6 and
-2.7). Due to the changes in its behavior from Python 2 to Python 3 and
-what objects supported which implementation of the buffer protocol, it
-was a bit challenging to leverage effectively in Zarr.
-
-Thanks to some under-the-hood changes in Zarr 2.3 and Numcodecs 0.6,
-the buffer protocol is now cleanly supported for Python 2/3 in both
-libraries when working with data. In addition to improved memory
-handling and performance, this should make it easier for users
-developing their own stores, compressors, and filters to use with
-Zarr. Also it has cutdown on the amount of code specialized for
-handling different Python versions.
-
-
-## Future developments
-
-There is a growing community of interest around new approaches to
-storage of array-like data, particularly in the cloud. For example,
-[Theo McCaie](https://github.com/tam203) from the UK Met Office
-Informatics Lab recently wrote a series of blog posts about the
-challenges involved in [storing 200TB of "high momentum" weather model
-data every
-day](https://medium.com/informatics-lab/creating-a-data-format-for-high-momentum-datasets-a394fa48b671). This
-is an exciting space to be working in and we'd like to do what we can
-to build connections and share knowledge and ideas between
-communities. We've started a [regular
-teleconference](https://github.com/zarr-developers/zarr/issues/315)
-which is open to anyone to join, and there is a new [gitter
-channel](https://gitter.im/zarr-developers/community) for general
-discussion.
-
-The main focus of our conversations so far has been setting up work
-towards development of a new set of specifications that support the
-features of both Zarr and N5, and provide a platform for exploration
-and development of new features, while also identifying a minimal core
-protocol that can be implemented in a range of different programming
-languages. It is still relatively early days and there are lots of
-open questions to work through, both on the technical side and in
-terms of how we organise and coordinate efforts. However, the
-community is very friendly and supportive, and anyone is welcome to
-participate, so if you have an interest please do consider getting
-involved.
-
-If you would like to stay in touch with or contribute to new
-developments, keep an eye on the
-[zarr](https://github.com/zarr-developers/zarr) and
-[zarr-specs](https://github.com/zarr-developers/zarr-specs) GitHub
-repositories, and please feel free to raise issues or add comments if
-you have any questions or ideas.
-
-
-## And finally... SciPy!
-
-If you're coming to SciPy this year, we're very pleased to be giving a
-talk on Zarr on [day 1 of the conference (Wednesday 10
-July)](https://www.eiseverywhere.com/ehome/381993). Several members of
-the Zarr community will be at the conference, and there are sprints
-going on after the conference in a number of related areas, including
-an Xarray sprint on the Saturday. Please do say hi or [drop us a
-comment on this
-issue](https://github.com/zarr-developers/zarr/issues/396) if you'd
-like to connect and discuss anything.
-
-----
-
-Blog post written by [Alistair Miles](https://github.com/alimanfoo)
-and [John Kirkham](https://github.com/jakirkham).
diff --git a/_posts/2019-06-19-zarr-v3-update.md b/_posts/2019-06-19-zarr-v3-update.md
deleted file mode 100644
index 53f2122..0000000
--- a/_posts/2019-06-19-zarr-v3-update.md
+++ /dev/null
@@ -1,76 +0,0 @@
----
-title: "Zarr protocol v3 design update"
-date: 2019-06-19
-categories: zarr specs
-
-layout: home
-author_profile: false
-sidebar:
- title: "Content"
- nav: sidebar
----
-
-Today I put together some [slides summarising the current state of
-exploratory work on the Zarr v3 protocol
-spec](https://zarr-developers.github.io/slides/v3-update-20190619.html). The
-purpose of this blog post is to share those slides more widely, and to
-provide some context explaining why work has started on a v3 spec.
-
-## Why work on a v3 spec?
-
-The [current (v2) Zarr
-spec](https://zarr.readthedocs.io/en/stable/spec/v2.html) is
-implemented in a number of software libraries, and is a stable and
-robust protocol that is used in production in a number of different
-scientific communities. If you need to store and compute in parallel
-against large array-like data, it's a good solution. So why start
-thinking about a new protocol version?
-
-### Language-agnostic
-
-One reason is that the v2 protocol is somewhat Python-centric, and
-includes some features which are not straightforward to implement in
-other languages. This has meant that implementations do not all
-support the same feature set. It would be good to have a minimal v3
-protocol spec that could be fully implemented in any language, so all
-implementations have parity around a core feature set.
-
-### Unifying Zarr and N5
-
-Another reason is that we would like to merge development efforts
-between the Zarr and N5 communities, and so a goal for the v3 spec is
-to unify the two approaches and provide a common implementation
-target.
-
-### Extensibility
-
-A third reason is that a number of different groups have started
-experimenting and extending the Zarr protocol in interesting ways, but
-it's not always clear how to extend the v2 protocol to support new
-features. It would be good if the v3 spec provided a variety of clear
-extension points and extension mechanisms.
-
-### Cloud storage
-
-Finally, while the v2 spec can be used very effectively with
-distributed storage systems like Amazon S3 or Google Cloud Storage,
-there is room for improvement, particularly regarding how metadata is
-stored and organised.
-
-## Zarr v3 design update
-
-I you are interested in knowing more about the current status of work
-on the v3 spec, please take a look at the [v3 design update
-slides](https://zarr-developers.github.io/slides/v3-update-20190619.html). The
-slides use reveal.js and have both horizontal and vertical
-navigation - if you haven't seen that before, then navigate downwards
-first wherever you can, before navigating to the right.
-
-As I mention in the slides, the current v3 spec is just a straw man,
-meant to illustrate some ideas and potential solutions, but everything
-is up for discussion. So if you have any comments or ideas, please do
-get in touch, anyone is welcome to participate.
-
-----
-
-Blog post written by [Alistair Miles](https://github.com/alimanfoo).
diff --git a/about/index.html b/about/index.html
deleted file mode 100644
index f0ce6c0..0000000
--- a/about/index.html
+++ /dev/null
@@ -1,5 +0,0 @@
-
-
-
-
-
diff --git a/adopters/index.md b/adopters/index.md
index 98ee847..796c3e1 100644
--- a/adopters/index.md
+++ b/adopters/index.md
@@ -1,104 +1,135 @@
---
-layout: single
-author_profile: false
title: Zarr Adopters
-sidebar:
- title: "Content"
- nav: sidebar
---
-
💡 If you're using Zarr in any way and would like to be added on this page, please drop your logo and blurb here.
+💡 If you're using Zarr in any way and would like to be added on this page, please drop your logo and blurb [here](https://github.com/zarr-developers/community/issues/60).
-
Thanks to the amazing community, Zarr is widely adopted and used by these groups. Here are the logos (in alphabetical order):
+Thanks to the amazing community, Zarr is widely adopted and used by these groups:
-
+
-→ Zarr is used by CarbonPlan as a storage format for analysis and visualization of climate data.
+
----
+
-
+Zarr is used by [CarbonPlan](https://carbonplan.org/) as a storage format for analysis and visualization of climate data.
----
+
-
+
----
+
-
+Zarr is currently used by DANDI for lightsheet microscopy and some ex vivo MRI datasets, and is being considered as a backend for Neurodata Without Borders (NWB) neurophysiology datasets.
-→ Zarr is currently used by DANDI for lightsheet microscopy and some ex vivo MRI datasets, and is being considered as a backend for Neurodata Without Borders (NWB) neurophysiology datasets.
+
----
+
-
+
-→ ESA is introducing Zarr for the management of the Copernicus Sentinel data and the future Copernicus Expansion missions under the name of EOPF (Earth observation processing Framework). An initial set of data can be accessed through the Sample Service.
+ESA is introducing Zarr for the management of the Copernicus Sentinel data and the future Copernicus Expansion missions under the name of [EOPF (Earth observation processing Framework)](https://eopf.copernicus.eu/). An initial set of data can be accessed through the [Sample Service](https://zarr.eopf.copernicus.eu/).
----
+
-
+
-→ Google Research has released a number of open source projects for working with Zarr data (including Neuroglancer, TensorStore, Xarray-Beam, and Xarray-TensorStore) and uses Zarr for distributing large-scale weather datasets.
+
----
+Google Research has released a number of open source projects for working with Zarr data (including [Neuroglancer](https://github.com/google/neuroglancer/), [TensorStore](https://github.com/google/tensorstore/), [Xarray-Beam](https://github.com/google/xarray-beam/), and [Xarray-TensorStore](https://github.com/google/xarray-tensorstore/)) and uses Zarr for distributing [large-scale weather datasets](https://github.com/google-research/arco-era5/).
-
+
-→ Zarr is used extensively within Janelia Research Campus for efficiently storing and accessing large imaging datasets
+
----
+
-
+Zarr is used extensively within Janelia Research Campus for efficiently storing and accessing large imaging datasets.
-→ Zarr is used by the LEAP Project (Learning the Earth with Artificial Intelligence and Physics) at Columbia University as the data format for the LEAP-Pangeo data library. LEAP researchers use Zarr to perform data analytics and machine learning for climate modeling.
+
----
+
-
+
-→ Zarr is used by the Microsoft Planetary Computer as a cloud-native storage format for chunked, N-dimensional arrays of geospatial data.
+Zarr is used by the [LEAP Project](https://leap.columbia.edu/) (Learning the Earth with Artificial Intelligence and Physics) at Columbia University as the data format for the LEAP-Pangeo data library. LEAP researchers use Zarr to perform data analytics and machine learning for climate modeling.
----
+
-
+
-→ Zarr is used by NASA as an analysis-ready data store for chunked, N-dimensional arrays of geospatial data.
+
----
+Zarr is used by the [Microsoft Planetary Computer](https://planetarycomputer.microsoft.com/) as a cloud-native storage format for chunked, N-dimensional arrays of geospatial data.
-
+
-
+
-→ Zarr is used extensively within the Pangeo Project as a cloud native storage format for ocean, weather, climate, and geospatial data.
+The [Open Microscopy Environment (OME)](https://www.openmicroscopy.org/) community is developing a format, OME-Zarr, using Zarr to store large, n-dimensional biological images as part of the [Next-generation file format (NGFF)](https://ngff.openmicroscopy.org/) effort.
----
+OME logo used by [permission](https://www.openmicroscopy.org/artwork/), trademarked [Glencoe Software, Inc.](https://www.glencoesoftware.com/)
-
+
-→ scalable minds uses Zarr to store and process peta-scale 3D images of brain tissue for Connectomics analyses.
+
----
+
-
+Zarr is used extensively within the [Pangeo Project](https://pangeo.io/) as a cloud native storage format for ocean, weather, climate, and geospatial data.
-→ Unidata is developing its netCDF libaries to support Zarr datasets amongst the atmospheric and geoscientific community.
+
----
+
-
+
-→ WEBKNOSSOS is a web-based platform for visualizing, collaboratively annotating and sharing large 3D images. Zarr is used for accessing remotely stored images as well as securely streaming annotation data to client applications.
+scalable minds uses Zarr to store and process peta-scale 3D images of brain tissue for Connectomics analyses.
----
+
+
+
+
+
+
+[Unidata](https://www.unidata.ucar.edu/) is developing its netCDF libraries to support Zarr datasets amongst the atmospheric and geoscientific community.
+
+
+
+
+
+
+
+WEBKNOSSOS is a web-based platform for visualizing, collaboratively annotating and sharing large 3D images. Zarr is used for accessing remotely stored images as well as securely streaming annotation data to client applications.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/browserconfig.xml b/browserconfig.xml
deleted file mode 100644
index 98e3ee9..0000000
--- a/browserconfig.xml
+++ /dev/null
@@ -1,9 +0,0 @@
-
-
-
-
-
- #ffffff
-
-
-
diff --git a/community/index.md b/community/index.md
index 552d685..b1724bf 100644
--- a/community/index.md
+++ b/community/index.md
@@ -1,10 +1,5 @@
---
-layout: single
-author_profile: false
title: Zarr Community
-sidebar:
- title: "Content"
- nav: sidebar
---
Zarr is a community-driven open-source data format specification developed and maintained by diverse [contributors](https://github.com/orgs/zarr-developers/teams). The Zarr contributors aim to maintain an open, equal, diverse, and inclusive community. Please read the [Zarr Code of Conduct](https://github.com/zarr-developers/.github/blob/main/CODE_OF_CONDUCT.md) for the appropriate way to interact with the community and help each other.
diff --git a/conventions/index.md b/conventions/index.md
index 7946eab..9714f94 100644
--- a/conventions/index.md
+++ b/conventions/index.md
@@ -1,28 +1,16 @@
---
-layout: single
-author_profile: false
title: Zarr Conventions
-sidebar:
- title: "Content"
- nav: sidebar
---
-
-The Zarr storage format is used in various domains and has active communities
-for each field. These communities have established several conventions on top
-of Zarr.
+The Zarr storage format is used in various domains and has active communities for each field. These communities have established several conventions on top of Zarr.
-Here are some of the conventions that are currently in use:
+Here are some of the conventions that are currently in use:
-
+- [Anndata](https://anndata.readthedocs.io/en/latest/)
+- [BDZ](https://github.com/openssbd/bdz)
+- [GeoZarr](https://github.com/zarr-developers/geozarr-spec)
+- [NCZarr](https://docs.unidata.ucar.edu/nug/current/nczarr_head.html)
+- [OME-Zarr](https://github.com/ome/ome-zarr-py)
+- [Xarray](https://docs.xarray.dev/en/stable/internals/zarr-encoding-spec.html)
-If you're working on/know a new convention and want to add it to the list, feel free
-to send a PR to the website repository.
-
+If you're working on or know of a new convention and want to add it to the list, feel free to send a PR to the website [repository](https://github.com/zarr-developers/zarr-developers.github.io/).
diff --git a/datasets/index.md b/datasets/index.md
index a367793..8a2bf6c 100644
--- a/datasets/index.md
+++ b/datasets/index.md
@@ -1,17 +1,8 @@
---
-layout: single
-author_profile: false
title: Zarr Datasets
-sidebar:
- title: "Content"
- nav: sidebar
---
-
-Zarr has been adopted by a variety of
-research groups, universities, organisations, and open-source communities.
-Below is a list of a few examples of public datasets.
-
+Zarr has been [adopted](https://zarr.dev/adopters/) by a variety of research groups, universities, organisations, and open-source communities. Below is a list of a few examples of public datasets.
| Dataset | Hosting | Size |
| ----------------------------------------------------------------------------------------------------------------- | --------------------- | ----- |
@@ -23,13 +14,8 @@ Below is a list of a few examples of public datasets.
| [National Water Model](https://discourse.pangeo.io/t/the-national-water-model-reanalysis-zarr-dataset-on-aws/1449)| Amazon AWS | 5 TB |
| [webKnossos](https://zarr.webknossos.org/) | scalableminds GmbH | 70 TB |
-
- If you think your dataset would be a useful example to add to the list,
-feel free to send a PR to the website repository.
+If you think your dataset would be a useful example to add to the list, feel free to send a PR to the website [repository](https://github.com/zarr-developers/zarr-developers.github.io/).
-Please ensure the dataset you're adding to the list is public, free to use,
-documented, and illustrate the application of Zarr within a specific domain.
+Please ensure the dataset you're adding to the list is public, free to use, documented, and illustrates the application of Zarr within a specific domain.
-Please only include URLs that you expect to be stable and maintained in the
-foreseeable future.
-
+Please only include URLs that you expect to be stable and maintained in the foreseeable future.
diff --git a/implementations/index.md b/implementations/index.md
index 4f0cf62..8c51007 100644
--- a/implementations/index.md
+++ b/implementations/index.md
@@ -1,20 +1,10 @@
---
-layout: single
-author_profile: false
title: Zarr Implementations
-sidebar:
- title: "Content"
- nav: sidebar
---
-
-Zarr is a data storage format based on an open-source specification, making
-implementations across several languages possible. It is used in various
-domains, including geospatial, bio-imaging, genomics, data science, and HPC. 🌏🔬🧬
+Zarr is a data storage format based on an open-source [specification](https://zarr-specs.readthedocs.io/), making implementations across several languages possible. It is used in various domains, including geospatial, bio-imaging, genomics, data science, and HPC. 🌏🔬🧬
-Zarr version 2 and 3 implementations are listed (in alphabetical order per language) as follows:
-
+Zarr version 2 and 3 implementations are listed (in alphabetical order per language) as follows:
| Language | Implementation | V2| V3| Latest Release/Commit |
|------------------------|------------------------|---|---|------------------------------|
@@ -100,9 +90,6 @@ Zarr version 2 and 3 implementations are listed (in alphabetical order per langu
[ExZarr-lu]: https://img.shields.io/github/last-commit/saalfeldlab/n5-zarr
-
-→ Feel free to add any missing implementations by sending a PR to the website repository. 🤝🏻
+→ Feel free to add any missing implementations by sending a PR to the website [repository](https://github.com/zarr-developers/zarr-developers.github.io/). 🤝🏻
-→ Get involved in various Zarr implementations by fixing bugs, resolving issues, improving documentation, or contributing to the codebase.
-If you've been doing any of these activities recently, we invite you to join our community meetings and share your work with us. We'd be delighted to showcase your efforts. 💪🏻
-
+→ Get involved in various Zarr implementations by fixing bugs, resolving issues, improving documentation, or contributing to the codebase. If you've been doing any of these activities recently, we invite you to join our [community meetings](https://zarr.dev/community-calls/) and share your work with us. We'd be delighted to showcase your efforts. 💪🏻
diff --git a/index.md b/index.md
index 1a1b7d5..d340595 100644
--- a/index.md
+++ b/index.md
@@ -1,59 +1,65 @@
---
-# You don't need to edit this file, it's empty on purpose.
-# Edit theme's home layout instead if you wanna make some changes
-# See: https://jekyllrb.com/docs/themes/#overriding-theme-defaults
-layout: single
-author_profile: false
-sidebar:
- title: "Content"
- nav: sidebar
-excerpt: "Have beautiful data in Zarr? Show us on Bluesky!"
-header:
- overlay_image: /images/llc4320_sst.png
- overlay_filter: 0.5 # same as adding an opacity of 0.5 to a black background
- caption: "Credit: Ryan Abernathey"
+layout: home
+
+hero:
+ name: Zarr
+ text: Chunked, compressed, N-dimensional arrays
+ tagline: An open, community-driven format for storing large arrays in any key-value store, including cloud object storage.
+ image:
+ src: /images/zarr-hero.gif
+ alt: Animated ECMWF temperature forecast rendered from a Zarr store, credit Development Seed (deck.gl-raster)
actions:
- - label: "Share here!"
- url: "https://bsky.app/intent/compose?text=@zarr.dev%20%23beautifuldata"
----
+ - theme: brand
+ text: Implementations
+ link: /implementations
+ - theme: alt
+ text: Specification
+ link: https://zarr-specs.readthedocs.io/
+ - theme: alt
+ text: Share on Bluesky
+ link: https://bsky.app/intent/compose?text=@zarr.dev%20%23beautifuldata
-Zarr is a community project to develop specifications and software for
-storage of large N-dimensional typed arrays, also commonly known as
-tensors. A particular focus of Zarr is to provide support for storage
-using distributed systems like cloud object stores, and to enable
-efficient I/O for parallel computing applications.
+features:
+ - title: Implementations
+ details: Zarr libraries in Python, C, C++, Rust, JavaScript, Java, Julia, and more.
+ link: /implementations
+ - title: Conventions
+ details: Domain conventions like OME-Zarr, GeoZarr, and Anndata layered on top of Zarr.
+ link: /conventions
+ - title: Community
+ details: Office hours, ZEPs, community calls, and chat. Everyone is welcome.
+ link: /community
+ - title: Datasets
+ details: Public Zarr datasets across climate, bio-imaging, genomics, and more.
+ link: /datasets
+---
## Description
-Zarr is motivated by the need for a simple, transparent, open, and
-community-driven format that supports high-throughput distributed I/O on
-different storage systems. Zarr data can be stored in any storage system that
-can be represented as a key-value store, including most commonly POSIX file
-systems and cloud object storage but also zip files as well as relational and
-document databases.
+Zarr is motivated by the need for a simple, transparent, open, and community-driven format that supports high-throughput distributed I/O on different storage systems. Zarr data can be stored in any storage system that can be represented as a key-value store, including most commonly POSIX file systems and cloud object storage but also zip files as well as relational and document databases.
See the following GitHub repositories for more information:
-* [Zarr Python](https://github.com/zarr-developers/zarr)
-* [Zarr Specs](https://github.com/zarr-developers/zarr-specs)
-* [Numcodecs](https://github.com/zarr-developers/numcodecs)
-* [Z5](https://github.com/constantinpape/z5)
-* [N5](https://github.com/saalfeldlab/n5)
-* [Zarr.jl](https://github.com/JuliaIO/Zarr.jl)
-* [ndarray.scala](https://github.com/lasersonlab/ndarray.scala)
+- [Zarr Python](https://github.com/zarr-developers/zarr)
+- [Zarr Specs](https://github.com/zarr-developers/zarr-specs)
+- [Numcodecs](https://github.com/zarr-developers/numcodecs)
+- [Z5](https://github.com/constantinpape/z5)
+- [N5](https://github.com/saalfeldlab/n5)
+- [Zarr.jl](https://github.com/JuliaIO/Zarr.jl)
+- [ndarray.scala](https://github.com/lasersonlab/ndarray.scala)
## Applications
-* Simple and fast serialization of NumPy-like arrays, accessible from languages including Python, C, C++, Rust, Javascript, Java and Julia
-* Multi-scale n-dimensional image storage, e.g. in light and electron microscopy
-* Geospatial rasters, e.g. following the NetCDF / CF metadata conventions
+- Simple and fast serialization of NumPy-like arrays, accessible from languages including Python, C, C++, Rust, Javascript, Java and Julia
+- Multi-scale n-dimensional image storage, e.g. in light and electron microscopy
+- Geospatial rasters, e.g. following the NetCDF / CF metadata conventions
## Features
-* Chunk multi-dimensional arrays along any dimension.
-* Store arrays in memory, on disk, inside a Zip file, on S3, etc.
-* Read and write arrays concurrently from multiple threads or processes.
-* Organize arrays into hierarchies via annotatable groups.
+- Chunk multi-dimensional arrays along any dimension.
+- Store arrays in memory, on disk, inside a Zip file, on S3, etc.
+- Read and write arrays concurrently from multiple threads or processes.
+- Organize arrays into hierarchies via annotatable groups.
## Sponsorship
@@ -63,13 +69,10 @@ NumFOCUS Sponsored Projects rely on the generous support of corporate sponsors,
## Videos
-[Videos](https://www.youtube.com/playlist?list=PLvkeNUPrCU04Xvcph4ErxsRkZq28Oucr7)
-of community members talking about Zarr. If you have a video you'd like us to share, let us know!
+[Videos](https://www.youtube.com/playlist?list=PLvkeNUPrCU04Xvcph4ErxsRkZq28Oucr7) of community members talking about Zarr. If you have a video you'd like us to share, let us know!
+
+
+
+## Acknowledgements
-
-
-
-
-
+Hero animation adapted from [`dynamical-zarr-ecmwf`](https://github.com/developmentseed/deck.gl-raster/tree/main/examples/dynamical-zarr-ecmwf) in [developmentseed/deck.gl-raster](https://github.com/developmentseed/deck.gl-raster)
diff --git a/mstile-150x150.png b/mstile-150x150.png
deleted file mode 100644
index d3a353c..0000000
Binary files a/mstile-150x150.png and /dev/null differ
diff --git a/numcodecs_redirects/adler32.md b/numcodecs_redirects/adler32.md
deleted file mode 100644
index 3d6195a..0000000
--- a/numcodecs_redirects/adler32.md
+++ /dev/null
@@ -1,5 +0,0 @@
----
-permalink: /numcodecs/adler32
-redirect_to:
- - https://numcodecs.readthedocs.io/en/stable/checksum32.html#adler32
----
diff --git a/numcodecs_redirects/astype.md b/numcodecs_redirects/astype.md
deleted file mode 100644
index fadef65..0000000
--- a/numcodecs_redirects/astype.md
+++ /dev/null
@@ -1,5 +0,0 @@
----
-permalink: /numcodecs/astype
-redirect_to:
- - https://numcodecs.readthedocs.io/en/stable/astype.html
----
\ No newline at end of file
diff --git a/numcodecs_redirects/bitround.md b/numcodecs_redirects/bitround.md
deleted file mode 100644
index 783c5b8..0000000
--- a/numcodecs_redirects/bitround.md
+++ /dev/null
@@ -1,5 +0,0 @@
----
-permalink: /numcodecs/bitround
-redirect_to:
- - https://numcodecs.readthedocs.io/en/stable/bitround.html
----
\ No newline at end of file
diff --git a/numcodecs_redirects/blosc.md b/numcodecs_redirects/blosc.md
deleted file mode 100644
index f6a28f3..0000000
--- a/numcodecs_redirects/blosc.md
+++ /dev/null
@@ -1,6 +0,0 @@
----
-permalink: /numcodecs/blosc
-redirect_to:
- - https://numcodecs.readthedocs.io/en/stable/blosc.html
----
-
diff --git a/numcodecs_redirects/bz2.md b/numcodecs_redirects/bz2.md
deleted file mode 100644
index 79c9be7..0000000
--- a/numcodecs_redirects/bz2.md
+++ /dev/null
@@ -1,5 +0,0 @@
----
-permalink: /numcodecs/bz2
-redirect_to:
- - https://numcodecs.readthedocs.io/en/stable/bz2.html
----
\ No newline at end of file
diff --git a/numcodecs_redirects/crc32.md b/numcodecs_redirects/crc32.md
deleted file mode 100644
index 4743a96..0000000
--- a/numcodecs_redirects/crc32.md
+++ /dev/null
@@ -1,5 +0,0 @@
----
-permalink: /numcodecs/crc32
-redirect_to:
- - https://numcodecs.readthedocs.io/en/stable/checksum32.html#crc32
----
diff --git a/numcodecs_redirects/delta.md b/numcodecs_redirects/delta.md
deleted file mode 100644
index 16bf396..0000000
--- a/numcodecs_redirects/delta.md
+++ /dev/null
@@ -1,5 +0,0 @@
----
-permalink: /numcodecs/delta
-redirect_to:
- - https://numcodecs.readthedocs.io/en/stable/delta.html
----
\ No newline at end of file
diff --git a/numcodecs_redirects/fixedscaleoffset.md b/numcodecs_redirects/fixedscaleoffset.md
deleted file mode 100644
index da033db..0000000
--- a/numcodecs_redirects/fixedscaleoffset.md
+++ /dev/null
@@ -1,5 +0,0 @@
----
-permalink: /numcodecs/fixedscaleoffset
-redirect_to:
- - https://numcodecs.readthedocs.io/en/stable/fixedscaleoffset.html
----
\ No newline at end of file
diff --git a/numcodecs_redirects/fletcher32.md b/numcodecs_redirects/fletcher32.md
deleted file mode 100644
index 6a315f5..0000000
--- a/numcodecs_redirects/fletcher32.md
+++ /dev/null
@@ -1,5 +0,0 @@
----
-permalink: /numcodecs/fletcher32
-redirect_to:
- - https://numcodecs.readthedocs.io/en/stable/checksum32.html#fletcher32
----
diff --git a/numcodecs_redirects/gzip.md b/numcodecs_redirects/gzip.md
deleted file mode 100644
index d0d9e63..0000000
--- a/numcodecs_redirects/gzip.md
+++ /dev/null
@@ -1,5 +0,0 @@
----
-permalink: /numcodecs/gzip
-redirect_to:
- - https://numcodecs.readthedocs.io/en/stable/gzip.html
----
\ No newline at end of file
diff --git a/numcodecs_redirects/jenkins_lookup3.md b/numcodecs_redirects/jenkins_lookup3.md
deleted file mode 100644
index 7480225..0000000
--- a/numcodecs_redirects/jenkins_lookup3.md
+++ /dev/null
@@ -1,5 +0,0 @@
----
-permalink: /numcodecs/jenkins_lookup3
-redirect_to:
- - https://numcodecs.readthedocs.io/en/stable/checksum32.html#jenkinslookup3
----
diff --git a/numcodecs_redirects/lz4.md b/numcodecs_redirects/lz4.md
deleted file mode 100644
index 9247316..0000000
--- a/numcodecs_redirects/lz4.md
+++ /dev/null
@@ -1,5 +0,0 @@
----
-permalink: /numcodecs/lz4
-redirect_to:
- - https://numcodecs.readthedocs.io/en/stable/lz4.html
----
\ No newline at end of file
diff --git a/numcodecs_redirects/lzma.md b/numcodecs_redirects/lzma.md
deleted file mode 100644
index 87a9f72..0000000
--- a/numcodecs_redirects/lzma.md
+++ /dev/null
@@ -1,5 +0,0 @@
----
-permalink: /numcodecs/lzma
-redirect_to:
- - https://numcodecs.readthedocs.io/en/stable/lzma.html
----
\ No newline at end of file
diff --git a/numcodecs_redirects/numcodecs.md b/numcodecs_redirects/numcodecs.md
deleted file mode 100644
index 9b75cc5..0000000
--- a/numcodecs_redirects/numcodecs.md
+++ /dev/null
@@ -1,5 +0,0 @@
----
-permalink: /numcodecs
-redirect_to:
- - https://numcodecs.readthedocs.io/en/stable/
----
diff --git a/numcodecs_redirects/packbits.md b/numcodecs_redirects/packbits.md
deleted file mode 100644
index e818443..0000000
--- a/numcodecs_redirects/packbits.md
+++ /dev/null
@@ -1,5 +0,0 @@
----
-permalink: /numcodecs/packbits
-redirect_to:
- - https://numcodecs.readthedocs.io/en/stable/packbits.html
----
\ No newline at end of file
diff --git a/numcodecs_redirects/pcodec.md b/numcodecs_redirects/pcodec.md
deleted file mode 100644
index 8dff3b3..0000000
--- a/numcodecs_redirects/pcodec.md
+++ /dev/null
@@ -1,5 +0,0 @@
----
-permalink: /numcodecs/pcodec
-redirect_to:
- - https://numcodecs.readthedocs.io/en/stable/pcodec.html
----
\ No newline at end of file
diff --git a/numcodecs_redirects/quantize.md b/numcodecs_redirects/quantize.md
deleted file mode 100644
index bb63162..0000000
--- a/numcodecs_redirects/quantize.md
+++ /dev/null
@@ -1,5 +0,0 @@
----
-permalink: /numcodecs/quantize
-redirect_to:
- - https://numcodecs.readthedocs.io/en/stable/quantize.html
----
\ No newline at end of file
diff --git a/numcodecs_redirects/shuffle.md b/numcodecs_redirects/shuffle.md
deleted file mode 100644
index f1abb3e..0000000
--- a/numcodecs_redirects/shuffle.md
+++ /dev/null
@@ -1,5 +0,0 @@
----
-permalink: /numcodecs/shuffle
-redirect_to:
- - https://numcodecs.readthedocs.io/en/stable/shuffle.html
----
\ No newline at end of file
diff --git a/numcodecs_redirects/zfpy.md b/numcodecs_redirects/zfpy.md
deleted file mode 100644
index 1c5ac4c..0000000
--- a/numcodecs_redirects/zfpy.md
+++ /dev/null
@@ -1,5 +0,0 @@
----
-permalink: /numcodecs/zfpy
-redirect_to:
- - https://numcodecs.readthedocs.io/en/stable/zfpy.html
----
\ No newline at end of file
diff --git a/numcodecs_redirects/zlib.md b/numcodecs_redirects/zlib.md
deleted file mode 100644
index a3b52a0..0000000
--- a/numcodecs_redirects/zlib.md
+++ /dev/null
@@ -1,5 +0,0 @@
----
-permalink: /numcodecs/zlib
-redirect_to:
- - https://numcodecs.readthedocs.io/en/stable/zlib.html
----
\ No newline at end of file
diff --git a/numcodecs_redirects/zstd.md b/numcodecs_redirects/zstd.md
deleted file mode 100644
index 8cee879..0000000
--- a/numcodecs_redirects/zstd.md
+++ /dev/null
@@ -1,5 +0,0 @@
----
-permalink: /numcodecs/zstd
-redirect_to:
- - https://numcodecs.readthedocs.io/en/stable/zstd.html
----
\ No newline at end of file
diff --git a/office-hours/index.md b/office-hours/index.md
new file mode 100644
index 0000000..cc2f9dc
--- /dev/null
+++ b/office-hours/index.md
@@ -0,0 +1,25 @@
+---
+title: Zarr Office Hours
+---
+
+
+
+
+
+We're hosting office hours on Wednesdays every two weeks. Please join us if you have questions about Zarr and want to learn more about the storage format. We'll discuss the specification, the implementations, what's new in the Zarr ecosystem, how you can get involved in the community and much more.
+
+Office hours are a great place to go if you want to start using Zarr and have questions about whether it suits your data storage needs. We'll have a few items on the agenda to kickstart the meeting, but the overall agenda and structure of the office hours will be shaped according to the attendees' and community's needs.
+
+Please see the Zarr community calendar for exact timings:
+
+
+
+Download the [.ics file](https://calendar.google.com/calendar/ical/c_ba2k79i3u0lkf49vo0jre27j14%40group.calendar.google.com/public/basic.ics) and add it to your calendar so you won't miss any of our meetings!
diff --git a/office_hours/index.md b/office_hours/index.md
deleted file mode 100644
index a1616ea..0000000
--- a/office_hours/index.md
+++ /dev/null
@@ -1,21 +0,0 @@
----
-layout: single
-author_profile: false
-title: Zarr Office Hours
-permalink: /office-hours/
-sidebar:
- title: "Content"
- nav: sidebar
----
-
-
-
-
We’re hosting office hours on Wednesdays every two weeks. Please join us if you have questions about Zarr and want to learn more about the storage format. We’ll discuss the specification, the implementations, what’s new in the Zarr ecosystem, how you can get involved in the community and much more.
-
-
Office hours are a great place to go if you want to start using Zarr and have questions about whether it suits your data storage needs. We’ll have a few items on the agenda to kickstart the meeting, but the overall agenda and structure of the office hours will be shaped according to the attendees’ and community’s needs.
-
-
Please see the Zarr community calendar for exact timings:
-
-
-
-
-```
-
-The presentation markup hierarchy needs to be `.reveal > .slides > section` where the `section` represents one slide and can be repeated indefinitely. If you place multiple `section` elements inside of another `section` they will be shown as vertical slides. The first of the vertical slides is the "root" of the others (at the top), and will be included in the horizontal sequence. For example:
-
-```html
-
-```
-
-### Markdown
-
-It's possible to write your slides using Markdown. To enable Markdown, add the `data-markdown` attribute to your `` elements and wrap the contents in a `
-
-* We use the SciPy stack for all our data analysis.
-* Data growing rapidly, need to scale our analysis.
-
-====
-
-## Motivation: Why Zarr?
-
-===
-
-### Problem statement
-
-
-
-There is some computation we want to perform.
-
-Inputs and outputs are multidimensional arrays (a.k.a. tensors).
-
-5 key features...
-
-===
-
-### (1) Larger than memory
-
-Input and/or output tensors are too big to fit comfortably in main
-memory.
-
-===
-
-### (2) Computation can be parallelised
-
-
-
-At least some part of the computation can be parallelised by
-processing data in chunks.
-
-===
-
-### E.g., embarassingly parallel
-
-
-
-===
-
-### (3) I/O is the bottleneck
-
-Computational complexity is moderate → significant amount of time is
-spent in reading and/or writing data.
-
-N.B., bottleneck may be due to (a) limited I/O bandwidth, (b) I/O is
-not parallel.
-
-===
-
-### (4) Data are compressible
-
-* Compression is a very active area of innovation.
-* Modern compressors achieve good compression ratios with very high speed.
-* Compression can increase effective I/O bandwidth, sometimes
- dramatically.
-
-===
-
-### (5) Speed matters
-
-* Rich datasets → exploratory science → interactive analysis
- → many rounds of summarise, visualise, hypothesise, model,
- test, repeat.
-
-* E.g., genome sequencing.
-
- * Now feasible to sequence genomes from 100,000s of individuals and
- compare them.
-
- * Each genome is a complete molecular blueprint for an organism
- → can investigate many different molecular pathways and
- processes.
-
- * Each genome is a history book handed down through the ages, with
- each generation making its mark → can look back in time and
- infer major demographic and evolutionary events in the history of
- populations and species.
-
-===
-
-### Problem: key features
-
-0. Inputs and outputs are tensors.
-1. Data are larger than memory.
-2. Computation can be parallelised.
-3. I/O is the bottleneck.
-4. Data are compressible.
-5. Speed matters.
-
-====
-
-## Solution
-
-1. Chunked, parallel tensor computing framework.
-2. Chunked, parallel tensor storage library.
-
-Align the chunks!
-
-===
-
-
-
-Parallel computing framework for chunked tensors.
-
-```python
-import dask.array as da
-
-a = ... # what goes here?
-x = da.from_array(a)
-y = (x - x.mean(axis=1)) / x.std(axis=1)
-u, s, v = da.linalg.svd_compressed(y, 20)
-u = u.compute()
-```
-
-* Write code using a numpy-like API.
-* Parallel execution on local workstation, HPC cluster, Kubernetes cluster, ...
-
-===
-
-
-
-* Scale up ocean / atmosphere / land / climate science.
-* Aim to handle petabyte-scale datasets on HPC and cloud platforms.
-* Using Dask.
-* Needed a tensor storage solution.
-* Interested to use cloud object stores: Amazon S3, Azure Blob Storage, Google Cloud Storage, ...
-
-====
-
-## Tensor storage: prior art
-
-===
-
-### HDF5 (h5py)
-
-* Store tensors ("datasets").
-* Divide data into regular chunks.
-* Chunks are compressed.
-* Group tensors into a hierarchy.
-* Smooth integration with NumPy...
-
-```python
-import h5py
-x = h5py.File('example.h5')['x']
-# read 1000 rows into numpy array
-y = x[:1000]
-```
-
-===
-
-### HDF5 - limitations
-
-* No thread-based parallelism.
-* Cannot do parallel writes with compression.
-* Not easy to plug in a new compressor.
-* No support for cloud object stores (but see [Kita](https://www.hdfgroup.org/solutions/hdf-kita/hdf-kita-architecture)).
-
-See also [moving away from
-HDF5](https://cyrille.rossant.net/moving-away-hdf5/) by Cyrille Rossant.
-
-===
-
-### bcolz
-
-
-
-* Developed by [Francesc Alted](https://github.com/FrancescAlted).
-* Chunked storage, primarily intended for storing 1D arrays (table columns), but can also store tensors.
-* Implementation is simple (in a good way).
-* Data format on disk is simple - one file for metadata, one file for each chunk.
-* Showcase for the [Blosc compressor](http://blosc.org/).
-
-===
-
-### bcolz - limitations
-
-* Chunking in 1 dimension only.
-* No support for cloud object stores.
-
-====
-
-## How hard could it be ...
-
-... to implement a chunked storage library for tensor data that
-supported parallel reads, parallel writes, was easy to plug in new
-compressors, and easy to plug in different storage systems like cloud
-object stores?
-
-===
-
-### ``````
-
-
3 years, 1,107 commits, 39 releases, 259 issues, 165 PRs, and at least 2 babies later ...
-
-====
-
-### Zarr Python
-
-```bash
-$ pip install zarr
-```
-
-```bash
-$ conda install -c conda-forge zarr
-```
-
-```python
->>> import zarr
->>> zarr.__version__
-'2.3.2'
-```
-
-===
-
-### Conceptual model based on HDF5
-
-* Multiple arrays (a.k.a. datasets) can be created and organised into
- a hierarchy of groups.
-
-* Each array is divided into regular shaped chunks.
-
-* Each chunk is compressed before storage.
-
-===
-
-### Creating a hierarchy
-
-```python
->>> store = zarr.DirectoryStore('example.zarr')
->>> root = zarr.group(store)
->>> root
-
-```
-
-Using DirectoryStore the data will be stored in a directory on the
-local file system.
-
-===
-
-### Creating an array
-
-```python
->>> hello = root.zeros('hello',
-... shape=(10000, 10000),
-... chunks=(1000, 1000),
-... dtype='>> hello
-
-```
-
-* Creates a 2-dimensional array of 32-bit integers with 10,000 rows
-and 10,000 columns.
-
-* Divided into chunks where each chunk has 1,000 rows and 1,000 columns.
-
-* There will be 100 chunks in total, arranged in a 10x10 grid.
-
-===
-
-### Creating an array (h5py-style API)
-
-```python
->>> hello = root.create_dataset('hello',
-... shape=(10000, 10000),
-... chunks=(1000, 1000),
-... dtype='>> hello
-
-```
-
-===
-
-### Creating an array (big)
-
-```python
->>> big = root.zeros('big',
-... shape=(100_000_000, 100_000_000),
-... chunks=(10_000, 10_000),
-... dtype='i4')
->>> big
-
-```
-
-===
-
-### Creating an array (big)
-
-```python
->>> big.info
-Name : /big
-Type : zarr.core.Array
-Data type : int32
-Shape : (100000000, 100000000)
-Chunk shape : (10000, 10000)
-Order : C
-Read-only : False
-Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
-Store type : zarr.storage.DirectoryStore
-No. bytes : 40000000000000000 (35.5P)
-No. bytes stored : 355
-Storage ratio : 112676056338028.2
-Chunks initialized : 0/100000000
-```
-
-* That's a 35 petabyte array.
-* N.B., chunks are initialized on write.
-
-===
-
-### Writing data into an array
-
-```python
->>> big[0, 0:20000] = np.arange(20000)
->>> big[0:20000, 0] = np.arange(20000)
-```
-
-* Same API as writing into numpy array or h5py dataset.
-
-===
-
-### Reading data from an array
-
-```python
->>> big[0:1000, 0:1000]
-array([[ 0, 1, 2, ..., 997, 998, 999],
- [ 1, 0, 0, ..., 0, 0, 0],
- [ 2, 0, 0, ..., 0, 0, 0],
- ...,
- [997, 0, 0, ..., 0, 0, 0],
- [998, 0, 0, ..., 0, 0, 0],
- [999, 0, 0, ..., 0, 0, 0]], dtype=int32)
-```
-
-* Same API as slicing a numpy array or reading from an h5py dataset.
-
-===
-
-### Chunks are initialized on write
-
-```python
->>> big.info
-Name : /big
-Type : zarr.core.Array
-Data type : int32
-Shape : (100000000, 100000000)
-Chunk shape : (10000, 10000)
-Order : C
-Read-only : False
-Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
-Store type : zarr.storage.DirectoryStore
-No. bytes : 40000000000000000 (35.5P)
-No. bytes stored : 5171386 (4.9M)
-Storage ratio : 7734870303.6
-Chunks initialized : 3/100000000
-```
-
-===
-
-### Files on disk
-
-```bash
-$ tree -a example.zarr
-example.zarr
-├── big
-│ ├── 0.0
-│ ├── 0.1
-│ ├── 1.0
-│ └── .zarray
-├── hello
-│ └── .zarray
-└── .zgroup
-
-2 directories, 6 files
-```
-
-===
-
-### Array metadata
-
-```bash
-$ cat example.zarr/big/.zarray
-{
- "chunks": [
- 10000,
- 10000
- ],
- "compressor": {
- "blocksize": 0,
- "clevel": 5,
- "cname": "lz4",
- "id": "blosc",
- "shuffle": 1
- },
- "dtype": ">> big[-1000:, -1000:]
-array([[0, 0, 0, ..., 0, 0, 0],
- [0, 0, 0, ..., 0, 0, 0],
- [0, 0, 0, ..., 0, 0, 0],
- ...,
- [0, 0, 0, ..., 0, 0, 0],
- [0, 0, 0, ..., 0, 0, 0],
- [0, 0, 0, ..., 0, 0, 0]], dtype=int32)
-```
-
-* No data on disk, fill value is used (in this case zero).
-
-===
-
-### Reading the whole array
-
-```python
->>> big[:]
-MemoryError
-```
-
-* Read the whole array into memory (if you can!)
-
-====
-
-## [Pluggable storage](https://zarr.readthedocs.io/en/stable/tutorial.html#storage-alternatives)
-
-`zarr.DirectoryStore`, `zarr.ZipStore`, `zarr.DBMStore`,
-`zarr.LMDBStore`, `zarr.SQLiteStore`, `zarr.MongoDBStore`,
-`zarr.RedisStore`, `zarr.ABSStore`, `s3fs.S3Map`, `gcsfs.GCSMap`, ...
-
-===
-
-### DirectoryStore
-
-```python
->>> store = zarr.DirectoryStore('example.zarr')
->>> root = zarr.group(store)
->>> big = root['big']
->>> big
-
-```
-
-===
-
-### DirectoryStore (reminder)
-
-```bash
-$ tree -a example.zarr
-example.zarr
-├── big
-│ ├── 0.0
-│ ├── 0.1
-│ ├── 1.0
-│ └── .zarray
-├── hello
-│ └── .zarray
-└── .zgroup
-
-2 directories, 6 files
-```
-
-===
-
-### ZipStore
-
-```bash
-$ cd example.zarr && zip -r0 ../example.zip ./*
-```
-
-```python
->>> store = zarr.ZipStore('example.zip')
->>> root = zarr.group(store)
->>> big = root['big']
->>> big
-
-```
-
-===
-
-### Google cloud storage (via [gcsfs](https://github.com/dask/gcsfs))
-
-```bash
-$ gsutil config
-$ gsutil rsync -ru example.zarr/ gs://zarr-demo/example.zarr/
-```
-
-```python
->>> import gcsfs
->>> gcs = gcsfs.GCSFileSystem(token='anon', access='read_only')
->>> store = gcsfs.GCSMap('zarr-demo/example.zarr', gcs=gcs, check=False)
->>> root = zarr.group(store)
->>> big = root['big']
->>> big
-
-```
-
-===
-
-### Google cloud storage
-
-
-
-===
-
-
-
-===
-
-### Store interface
-
-* Any storage system can be used with Zarr if it can provide a
- key/value interface.
-
- * Keys are strings, values are bytes.
-
-* In Python, we use the MutableMapping interface.
-
- * `__getitem__`
- * `__setitem__`
- * `__iter__`
-
-* I.e., anything dict-like can be used as a Zarr store.
-
-===
-
-### E.g., ZipStore implementation
-
-```python
-class ZipStore(MutableMapping):
-
- def __init__(self, path, ...):
- self.zf = zipfile.ZipFile(path, ...)
-
- def __getitem__(self, key):
- with self.zf.open(key) as f:
- return f.read()
-
- def __setitem__(self, key, value):
- self.zf.writestr(key, value)
-
- def __iter__(self):
- for key in self.zf.namelist():
- yield key
-```
-
-(Actual implementation is slightly more complicated, but this is the essence.)
-
-====
-
-## Parallel computing with Zarr
-
-* A Zarr array can have multiple concurrent readers*.
-* A Zarr array can have multiple concurrent writers*.
-* Both multi-thread and multi-process parallelism are supported.
-* GIL is released during critical sections (compression and decompression).
-
-* Depending on the store.
-
-===
-
-### Dask + Zarr
-
-```python
-import dask.array as da
-import zarr
-
-# set up input
-store = ... # some Zarr store
-root = zarr.group(store)
-big = root['big']
-big = da.from_array(big)
-
-# define computation
-output = big * 42 + ...
-
-# if output is small, compute to memory
-o = output.compute()
-
-# if output is big, compute and write directly to Zarr
-da.to_zarr(output, store, component='output')
-```
-
-See docs for
-[`da.from_array()`](https://docs.dask.org/en/latest/array-api.html#dask.array.from_array),
-[`da.from_zarr()`](https://docs.dask.org/en/latest/array-api.html#dask.array.from_zarr),
-[`da.to_zarr()`](https://docs.dask.org/en/latest/array-api.html#dask.array.to_zarr),
-[`da.store()`](https://docs.dask.org/en/latest/array-api.html#dask.array.store).
-
-===
-
-### Write locks?
-
-
-
-* If each writer is writing to a different region of an array, and all
- writes are **aligned with chunk boundaries**, then locking is **not
- required**.
-
-===
-
-### Write locks?
-
-
-
-* If each writer is writing to a different region of an array, and
- writes are **not aligned** with chunk boundaries, then locking **is
- required** to avoid contention and/or data loss.
-
-===
-
-### Write locks?
-
-* Zarr does support chunk-level write locks for either multi-thread or
- multi-process writes.
-* But generally easier and better to align writes with chunk
- boundaries where possible.
-
-See Zarr tutorial for [further info on synchronisation](https://zarr.readthedocs.io/en/stable/tutorial.html#parallel-computing-and-synchronization).
-
-====
-
-## Pluggable compressors
-
-===
-
-### Compressor benchmark (genomic data)
-
-
-
-Alistair Miles ([@alimanfoo](https://github.com/alimanfoo))
-
-19 June 2019
-
-====
-
-### Current status
-
-* Development is ongoing via the core-protocol-v3.0-dev branch in the
- [zarr-specs repo](https://github.com/zarr-developers/zarr-specs),
- follow
- [PR#16](https://github.com/zarr-developers/zarr-specs/pull/16) for
- current status.
-
-* Rendered docs from this branch can be [viewed on
- RTFD](https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/).
-
-* This is still a straw man, everything is up for discussion.
-
-====
-
-### Design principles
-
-1. Hackable
-2. Parallel
-3. Distributed
-
-===
-
-### Design principles - hackable
-
-* Easy to implement.
-* Easy to extend with new functionality.
-* Easy to inspect and manipulate metadata and data with generic tools.
-
-===
-
-### Design principles - parallel
-
-* Think "what happens if two workers do X at the same time"?
-* Avoid race conditions.
-
-===
-
-### Design principles - distributed
-
-* Accommodate eventual consistency.
-
-====
-
-### Modular spec architecture
-
-* [Core protocol spec](https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/protocol/core/v3.0.html)
-* [Protocol extension specs](https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/protocol/extensions.html)
-* Codec specs
-* Storage specs
-
-===
-
-### [Core protocol spec](https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/protocol/core/v3.0.html)
-
-* Minimal, easy as possible to do full implementation in any language.
-
-* Aiming for intersection of N5 and Zarr v2 features.
-
-* Defines a variety of **extension points** so can also serve as a
- foundation for growth and experimentation.
-
-===
-
-### Protocol extension specs
-
-* Each protocol extension gets its own spec.
-
-* Currently the core-protocol-v3.0-dev branch also includes some
- protocol extension specs, to illustrate the concept.
-
- * E.g., [Datetime data types
- spec](https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/protocol/extensions/datetime-dtypes/v1.0.html)
-
-* Ultimately these extension specs should get split out into separate
- branches, so we decouple them from the core protocol spec.
-
-* N.B., protocol extensions could also **modify** the core protocol
- (more on that later).
-
-===
-
-### Codec specs
-
-* Each codec intended for use as a compressor or filter gets it's own
- spec.
-
-* @@TODO create an example to illustrate the concept.
-
-===
-
-### Storage specs
-
-* Each concrete storage system (e.g., file system, cloud object
- storage, Zip file, LMDB, ...) gets it's own spec.
-
-* @@TODO create an example to illustrate the concept.
-
-====
-
-### Spec development process - current
-
-* Currently @alimanfoo is acting as editor for the v3.0 core protocol
- spec.
-
- * Feedback/comments/ideas/contributions welcome from anyone at any
- time
- ([PR#16](https://github.com/zarr-developers/zarr-specs/pull/16) is
- probably the best place for comments).
-
- * Still in a conceptualisation phase, no need for formal decision
- process as yet.
-
-===
-
-### Spec development process - future
-
-* Ultimately I think we'll need to define a community process for spec
- development, so that:
-
- * It's clear how others can contribute.
-
- * It's clear how decisions get made.
-
- * It's clear what stage of maturity each spec is at.
-
-* Don't have a solution for that yet, may need advice/help on best
- approach.
-
-===
-
-### Spec development process - freedoms
-
-* Hopefully the [zarr-specs
- repo](https://github.com/zarr-developers/zarr-specs) can serve as a
- focus for community spec development.
-
- * ...and the [zarr-specs RTFD
- site](https://zarr-specs.readthedocs.io) can serve as a discovery
- point for specs.
-
-* However, don't want to force all spec development down the same
- route, or force all specs to be published in the same place.
-
- * This is one reason why currently the core protocol spec makes use
- of **URIs** in metadata to refer to protocol extensions and
- codecs - allow freedom for anyone to publish their own spec.
-
-====
-
-### [Core protocol - concepts and terminology](https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/protocol/core/v3.0.html#concepts-and-terminology)
-
-*Hierachy. Group. Array. Name. Path. Dimension. Shape. Element. Data
-type. Chunk. Grid. Memory layout. Compressor. Codec. Metadata
-document. Store.*
-
-* Are we comfortable with this terminology and how it is defined?
-
-* Any important missing terms/concepts?
-
-====
-
-### [Core protocol - node names](https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/protocol/core/v3.0.html#node-names)
-
-* Each node (array or group) in a hierarchy has a name.
-
-* Node names are used to form node paths.
-
- * E.g., "/foo/bar" is a path identifying a node named "bar" whose
- parent is named "foo" whose parent is the root node.
-
-===
-
-### Node names - restrictions
-
-* Node paths are used by users to access nodes and explore/navigate a
- hierarchy.
-
-* N.B., node paths are also used to form storage keys (see later).
-
-* To try and ensure compatibility with a variety of storage systems,
- the core protocol currently states fairly heavy
- [restrictions](https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/protocol/core/v3.0.html#node-names)
- on node names.
-
- * Includes restriction to ASCII alpha-numeric characters, "-", "_"
- and ".".
-
-===
-
-### Node names - restrictions
-
-* Are we comfortable with the current restrictions?
-
-* Should we be aiming to support Unicode? Or is that a bridge too far
- for now?
-
- * Not sure what full implications would be, but supporting Unicode
- could make storage specs and implementations harder to develop.
-
-===
-
-### Node names - case (in)sensitivity
-
-* N.B., some file systems are case sensitive, some are not.
-
-* This can ([and
- has](https://github.com/cggh/scikit-allel/issues/215#issuecomment-434088283))
- led to bugs with the zarr v2 protocol when used with file system
- storage on different operating systems.
-
-* Hard to resolve without annoying users (e.g., force all names to
- lower case) or complicating implementation (e.g., check for
- case-insensitive name clashes).
-
-* I'm not happy with the spec's current approach to this.
-
-====
-
-### Core protocol - arrays
-
-* Arrays have dimensions, shape and data type.
-
-* Shape (length of dimensions) is finite.
-
- * But protocol extension could modify this to define behaviour for
- "[open-ended](https://github.com/zarr-developers/zarr/issues/323)"
- (i.e., infinite) dimensions.
-
-====
-
-### [Core protocol - data types](https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/protocol/core/v3.0.html#id5)
-
-* Boolean (single byte)
-
-* Integer (signed or unsigned; 1, 2, 4, 8 bytes; little- or
- big-endian)
-
-* Float (2, 4, 8 bytes; little- or big-endian)
-
-* Any other data type can be defined via a protocol extension
-
- * E.g., [datetime data
- types](https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/protocol/extensions/datetime-dtypes/v1.0.html)
-
-===
-
-### Data types - identifiers
-
-* Each data type needs an identifier for use in metadata documents.
-
-* E.g., "bool", "i1", "u8", " Every element of the array is a member of one chunk, and there
- are no gaps or overlaps between chunks.
-
-===
-
-### Grid types
-
-* In general there are several different possible types of grid.
-
-* The core protocol defines [regular
- grids](https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/protocol/core/v3.0.html#regular-grids).
-
-* Other grid types could be defined via protocol extensions, e.g.,
- [non-uniform (rectilinear)
- grid](https://github.com/zarr-developers/zarr/issues/245).
-
-* Any grid type must define:
-
- * How the array space is divided into chunks.
-
- * A unique identifier for each chunk in the grid (used to form
- storage keys, see later).
-
-===
-
-### [Regular grids](https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/protocol/core/v3.0.html#regular-grids)
-
-* A grid type where each chunk is a (hyper)rectangle of the same shape.
-
-* I.e., essentially the grid type used in HDF5, Zarr v2 and N5,
- although different behaviours for edge chunks, see below.
-
-* Each chunk has a grid index, which is a tuple of grid coordinates.
-
- * E.g., grid index (0, 3, 7) means first chunk along first
- dimension, fourth chunk along second dimension, 8th chunk along
- third dimension.
-
-===
-
-### Regular grids - chunk identifiers
-
-* Chunk identifier is formed from grid index.
-
- * E.g., chunk at grid index (0, 3, 7) has identifier "0.3.7".
-
-* Default separator is "." but can be changed (e.g., to "/" as in N5)
- in array metadata (see later).
-
-===
-
-### Regular grids - edge chunks
-
-* All chunks have the same shape.
-
-* If the length of any array dimension is not perfectly divisible by
- the chunk length along the same dimension, the grid will overhang
- the edge of the array space.
-
-* Spec currently doesn't say any more about how to handle edge chunks,
- maybe it should?
-
- * E.g., suggest using array fill value to fill contents of edge
- chunks beyond the array space.
-
-* Other approaches (e.g., truncated edge chunks) could be defined as a
- different grid type via a protocol extension.
-
-===
-
-### Regular grids - resizing arrays
-
-* Regular grid supports growing and shrinking an array along any
- dimension.
-
- * Growing only requires change to array metadata (update array
- shape), no chunk data needs to be added or modified.
-
- * Shrinking requires change to array metadata (update array shape)
- plus delete any chunks now completely outside the array space.
-
-* Regular grid does not support [growing an array in "negative"
- direction](https://github.com/zarr-developers/zarr-specs/issues/9),
- i.e., prepending.
-
- * But could define a grid type that does support this via a protocol
- extension.
-
-====
-
-### Core protocol - chunks
-
-* What is a chunk? Logically, it's an N-dimensional typed array,
- containing data elements from a region of a Zarr array.
-
-* Chunk shape, data type and memory layout are all defined in the
- metadata for the Zarr array in which the chunk belongs.
-
- * I.e., chunks don't need their own metadata (at least when using a
- regular grid).
-
-===
-
-### Core protocol - chunks
-
-* When reading/writing chunks, expect implementations will use some
- appropriate class for managing typed memory blocks.
-
- * E.g., in Python could use
- [NumPy](https://docs.scipy.org/doc/numpy/reference/arrays.ndarray.html),
- [XND](https://xnd.readthedocs.io/en/latest/xnd/index.html) or
- [Arrow](https://arrow.apache.org/docs/python/data.html#arrays)
- (for 1D chunks).
-
- * E.g., in C could use
- [libxnd](https://xnd.readthedocs.io/en/latest/libxnd/index.html).
-
- * E.g., in C++ could use
- [xtensor](https://xtensor.readthedocs.io/en/latest/container.html#arrays-and-tensors).
-
-===
-
-### [Chunks - memory layouts](https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/protocol/core/v3.0.html#id6)
-
-* Memory layout (along with data type) defines binary representation
- for a chunk.
-
-* Core protocol defines two memory layouts for chunks.
-
- * C contiguous (row-major).
- * F contiguous (column-major).
-
-* Protocol extensions could define other memory layouts.
-
-===
-
-### [Chunks - encoding](https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/protocol/core/v3.0.html#chunk-encoding)
-
-* Optionally, an array can be configured with a compressor.
-
-* A compressor is a codec which can be used to encode and decode
- chunks during storage and retrieval.
-
-* Support for filters (sequence of zero or more codecs applied prior
- to compressor during encoding) has been moved following
- [discussion](https://github.com/zarr-developers/zarr-specs/issues/38)
- to a [protocol
- extension](https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/protocol/extensions/filters/v1.0.html).
-
-* N.B., codecs may be lossy, i.e., decode(encode(x)) round trip
- doesn't have to be perfect, but must preserve memory layout and data
- type.
-
-===
-
-### Codecs
-
-* Currently, the core protocol spec defines a codec as a pair of
- algorithms (encode and decode) which each operate on a sequence of
- bytes.
-
-* This should probably be modified so that [codecs operate on typed
- memory
- blocks](https://github.com/zarr-developers/zarr-specs/issues/38#issuecomment-500858540),
- i.e., have access to information about data type, item size, chunk
- shape.
-
-===
-
-### How to define a codec?
-
-* Publish a spec which:
-
- * Defines the encode and decode algorithms (or cites some existing
- documents that define them).
-
- * Defines any configuration parameters (e.g., compression level).
-
- * States the codec identifier, which must be a URI that dereferences to
- the codec spec.
-
- * The codec identifier is used in array metadata, see later.
-
-* @@TODO example codec spec
-
-====
-
-### [Core protocol - metadata](https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/protocol/core/v3.0.html#metadata)
-
-* Each Zarr hierarchy is defined via metadata documents.
-
- * One [bootstrap
- metadata](https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/protocol/core/v3.0.html#bootstrap-metadata)
- document.
- * Multiple [array
- metadata](https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/protocol/core/v3.0.html#array-metadata)
- documents.
- * Multiple [group
- metadata](https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/protocol/core/v3.0.html#group-metadata)
- documents.
-
-* Metadata documents are defined using the JSON type system (objects,
- arrays, strings, numbers).
-
-* Metadata documents are serialised (encoded) for storage.
-
- * Default encoding is JSON, but protocol extensions can define other
- encodings for group and array metadata, see below.
-
-===
-
-### [Array metadata](https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/protocol/core/v3.0.html#array-metadata) - example
-
-```json
-{
- "shape": [10000, 1000],
- "data_type": "(This example probably needs to be fixed to properly use
-JSON-LD, but hopefully the concept is clear.)
-
-===
-
-### [Group metadata](https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/protocol/core/v3.0.html#group-metadata) - example
-
-Currently nothing but extensions and attributes:
-
-```json
-{
- "extensions": [],
- "attributes": {
- "spam": "ham",
- "eggs": 42,
- }
-}
-```
-
-===
-
-### [Bootstrap metadata](https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/protocol/core/v3.0.html#bootstrap-metadata)
-
-* Bootstrap metadata is a new concept, not present in either Zarr v2
- or N5. Why add it?
-
-* Provides metadata about metadata, e.g., what core protocol version
- is being used, what metadata encoding is being used.
-
-* Allows for extensions to be declared that modify metadata and/or
- data layout or encoding or other protocol changes that apply
- globally to the entire hierarchy.
-
- * E.g., a protocol extension for [consolidated
- metadata](https://zarr.readthedocs.io/en/stable/tutorial.html#consolidating-metadata)
- could declare itself here, allowing an implementation to discover
- that consolidated metadata is available.
-
- * E.g., a protocol extension that uses [something other than
- JSON](https://github.com/zarr-developers/zarr-specs/issues/37#issuecomment-499954809)
- for metadata encoding could declare itself here.
-
-===
-
-### Bootstrap metadata - further rationale
-
-* Allows hierarchies to be self-describing (i.e., don't need
- out-of-band information to interpret).
-
-* Allows implementations to provide appropriate error messages when
- unsupported protocol extensions/modifications are being used.
-
-===
-
-### Bootstrap metadata - example
-
-```json
-{
- "zarr_format": "http://purl.org/zarr/spec/protocol/core/3.0",
- "metadata_encoding": "application/json",
- "extensions": [
- {
- "extension": "http://example.org/zarr/extension/foo",
- "must_understand": false,
- "configuration": {
- "foo": "bar"
- }
- }
- ]
-}
-```
-
-====
-
-### Core protocol - stores
-
-* All stores encapsulated by a simple [store interface](https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/protocol/core/v3.0.html#store-interface).
-
-* The store interface comprises a set of operations involving keys and values.
-
- * A key is an ASCII string (with some restrictions).
- * A value is a sequence of bytes.
-
-* Assume the store holds (key, value) pairs, with only one value for any given key.
-
- * I.e., a store is a mapping from keys to values.
-
-===
-
-### Store interface - capabilities
-
-* If a store is **readable** it implements:
-
- * `get(key) -> value`
-
-* If a store is **writeable** it implements:
-
- * `set(key, value)`
- * `delete(key)`
-
-* If a store is **listable** it implements one or more of:
-
- * `list() -> keys`
- * `list_prefix(prefix) -> keys`
- * `list_dir(prefix) -> (keys, prefixes)`
-
-===
-
-### Store interface - set(key, value)
-
-
-
-N.B., implementing `set()` is optional; if not implemented, store is
-read-only.
-
-===
-
-### Store interface - get(key) → value
-
-
-
-===
-
-### Store interface - list operations
-
-* List operations are required for discovering what groups and arrays
- a present in a hierarchy.
-
-* List operations are optional; if not implemented, then the
- user/application has to find out what arrays and groups are present
- by some other means (e.g., via a protocol extension such as
- consolidated metadata or [groups listing their
- children](https://github.com/zarr-developers/zarr-specs/issues/15);
- or some out-of-band communication).
-
-* Why are there three different list operations in the store
- interface? More on that later.
-
-===
-
-### [Store implementations](https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/protocol/core/v3.0.html#store-implementations)
-
-* As with current Zarr v2 and N5 protocols, goal of v3 store interface
- is to enable a range of store implementations using different
- storage technologies:
-
- * Local memory; file systems; zip files; local key-value databases
- (BDB, LMDB, Kyoto/Tokyo, LevelDB, ...); distributed key-value
- databases (e.g., mongo, redis, ...); relational databases; cloud
- object stores (S3, GCS, ABS, ...); ...
-
-* Core protocol does not specify how to implement any of these. Leave
- that for **storage specs**.
-
-===
-
-### Storage specs
-
-* Each storage spec defines how the store interface is implemented in
- some concrete storage system.
-
-* Generally will be obvious, but not always, and worth being explicit.
-
-* E.g., expect there will be a file system storage spec, which defines
- the following mapping:
-
-Operation | Implementation
---- | ---
-`get(key) -> value` | read contents of a file
-`set(key, value`) | write contents of a file
-`list_dir(prefix)`| list contents of directory
-... | ...
-
-====
-
-### Storage protocol
-
-* For each metadata document and chunk, need a unique **storage key**.
-
-* In Zarr v2, storage keys formed like this, e.g., for 2D array at
- path "/foo/bar":
-
- * Array metadata document : "foo/bar/.zarray"
-
- * Chunk at grid index (0, 0) : "foo/bar/0.0"
-
-* Two problems with the Zarr v2 protocol...
-
-===
-
-### Zarr v2 problem 1 - race conditions
-
-* When creating a node at some non-root path, e.g., "/foo/bar", then
- v2 spec says groups MUST be created at all ancestor paths.
-
-* E.g., to create an array at path "/foo/bar", need to first:
-
- * Check if a group exists at path "/foo", if not then create it.
-
- * Check if a group exists at path "/", if not then create it.
-
-===
-
-### Zarr v2 problem 1 - race conditions
-
-* But if multiple arrays are being created in parallel, can lead to
- race conditions.
-
- * E.g., if arrays at "/foo/bar" and "/foo/baz" are being created in
- parallel, can get race conditions checking existence of and
- creating group at paths "/foo" and "/".
-
- * Further information
- [here](https://github.com/zarr-developers/zarr-specs/pull/30#issuecomment-494085722).
-
-* v3 goal: avoid race conditions when creating nodes in parallel.
-
-===
-
-### Zarr v2 problems 2 - inefficient
-
-* Use case: user knows a group exists at path "/foo" and wants to know
- what children it has and for each child whether it's an array or
- sub-group.
-
-* Use case: user wants a complete display of a hierarchy, i.e., some
- kind of tree view of all nodes and their types (array or group).
-
-* With Zarr v2 protocol, both of these require [a
- lot](https://github.com/zarr-developers/zarr-specs/pull/30#issuecomment-492990803)
- of calls to the store interface.
-
-* On high-latency stores (e.g., cloud object stores) this can cause
- very noticeable delays.
-
-* v3 goal: minimise store operations needed to explore/discover hierarchy.
-
-===
-
-### Avoiding race conditions - implicit groups
-
-* Zarr v3 currently avoids race conditions during node creation by
- allowing **implicit groups**.
-
-* E.g., to create an array at path "/foo/bar", only a single store
- operation is required:
-
- * `set(array_metadata_key, encoded_array_metadata)`
-
-* By creating an array at path "/foo/bar", groups at all ancestor
- paths ("/foo", "/") are **implicitly** created.
-
-===
-
-### Hierarchy inconsistency?
-
-* So, what happens if a user tries to do something that breaks the
- hierarchy model?
-
- * E.g., create arrays at paths "/foo" and "/foo/bar"? (So there is
- both an array and an implicit group at path "/foo".)
-
- * E.g., explicitly create both an array and a group at path "/foo"?
-
-* Interesting question! Current v3 spec does not fully address this.
-
-===
-
-### Hierarchy inconsistency?
-
-* My current opinion: it's the user/application's responsibility to
- avoid hierarchy inconsistencies.
-
- * I.e., a Zarr core protocol implementation is **not** required to
- check or enforce hierarchy consistency during node creation
- (because this is hard to implement in a parallel/distributed
- context).
-
-* If an application needs to enforce hierarchy consistency, then it
- could implement some mechanism for synchronising node creation,
- and/or it could implement some mechanism for checking consistency
- after node creation.
-
-===
-
-### Making hierarchy discovery more efficient
-
-* Current v3 resolves this by three means:
-
- 1. Split the [storage key
- space](https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/protocol/core/v3.0.html#storage-keys),
- so all metadata keys have prefix "meta/" and all chunk keys have
- prefix "data/".
-
- 2. Change the format of metadata keys slightly (see below).
-
- 3. Leverage the fact that many stores, including cloud object
- stores, can support `list_prefix(prefix)` and/or
- `list_dir(prefix)` operations, which natively list all keys with
- a given prefix.
-
-===
-
-### v2/3 comparison - storage keys
-
-Bootstrap metadata ...
-
-Version | Storage key
---- | ---
-v2 | N/A
-v3 | `zarr.json`
-
-===
-
-### v2/3 comparison - storage keys
-
-Array metadata, e.g., for array at path "/foo/bar" ...
-
-Version | Storage key
---- | ---
-v2 | `foo/bar/.zarray`
-v3 | `meta/root/foo/bar.array`
-
-===
-
-### v2/3 comparison - storage keys
-
-Group metadata, e.g., for group at path "/foo/baz" ...
-
-Version | Storage key
---- | ---
-v2 | `foo/baz/.zgroup`
-v3 | `meta/root/foo/baz.group`
-
-===
-
-### v2/3 comparison - storage keys
-
-Chunk data, e.g., for chunk at grid index (0, 0) in 2D array at path "/foo/bar" ...
-
-Version | Storage key
---- | ---
-v2 | `foo/bar/0.0`
-v3 | `data/foo/bar/0.0`
-v3* | `data/foo/bar/0/0`
-
-* chunk key format can be configured in array metadata
-
-====
-
-### [Core protocol](https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/protocol/core/v3.0.html#protocol-operations) - examples
-
-* Initialize a hierarchy:
-
- * Perform `set("zarr.json", value)` where `value` is a serialised
- bootstrap metadata document.
-
-===
-
-### Core protocol - examples
-
-* Create an array at path "/foo/bar":
-
- * Perform `set("meta/root/foo/bar.array", value)` where `value` is a
- serialised array metadata document.
-
-===
-
-### Core protocol - examples
-
-* Create a group at path "/foo/baz":
-
- * Perform `set("meta/root/foo/baz.group", value)` where `value` is a
- serialised group metadata document.
-
-===
-
-### Core protocol - examples
-
-* List children of group "/foo":
-
- * Perform `list_dir("meta/root/foo/") -> (keys, prefixes)`.
-
- * Any returned key ending in ".array" indicates a child array.
-
- * Any returned key ending in ".group" indicates an explicit child
- group.
-
- * Any returned prefix indicates a child group implied by some
- descendant.
-
-===
-
-### Core protocol - examples
-
-* E.g., find all nodes in the hierarchy:
-
- * Perform `list_prefix("meta/") -> keys`.
-
- * All nodes (including implicit groups) and their types (either
- array or group) can be inferred from the returned keys.
-
-===
-
-### Core protocol - examples
-
-N.B., all of the above examples **required only a single store
-operation**. This is both a significant simplification and efficiency
-improvement over Zarr v2.
-
-====
-
-### Discussion
-
-* That's it for now.
-
-* Comments/questions/discussion welcome via
- [zarr-specs#16](https://github.com/zarr-developers/zarr-specs/pull/16).
-
-* Also please feel free to [raise an
- issue](https://github.com/zarr-developers/zarr-specs/issues/new) on
- the zarr-specs repo, adding the "core-protocol-v3.0" label.
-
-