diff --git a/.github/workflows/ci-cd.yml b/.github/workflows/ci-cd.yml
index 9b9932067..287409632 100644
--- a/.github/workflows/ci-cd.yml
+++ b/.github/workflows/ci-cd.yml
@@ -62,7 +62,7 @@ jobs:
services:
mysql: &db-service
- image: mysql:8.0
+ image: mysql:8.4.2 # please keep the image in sync with Dockerfile.db
env:
MYSQL_ROOT_HOST: '%'
MYSQL_ROOT_PASSWORD: 'root'
@@ -80,7 +80,13 @@ jobs:
- name: Setup dependencies
run: |
sudo apt-get -qq update
- sudo apt-get -yqq install libmariadb-dev libmagickwand-dev
+ sudo apt-get -yqq install libmariadb-dev \
+ libmagickwand-dev \
+ mariadb-client
+ - name: Report database client versions
+ run: |
+ mysql --version
+ mysqldump --version
- name: Setup Ruby
uses: ruby/setup-ruby@v1
with:
diff --git a/INSTALLATION.md b/INSTALLATION.md
index 703662612..db719de5a 100644
--- a/INSTALLATION.md
+++ b/INSTALLATION.md
@@ -131,6 +131,7 @@ the MySQL server with `sudo mysql -u root` and create a new database user for QP
CREATE USER qpixel@localhost IDENTIFIED BY 'choose_a_password_here';
GRANT ALL ON qpixel_dev.* TO qpixel@localhost;
GRANT ALL ON qpixel_test.* TO qpixel@localhost;
+GRANT ALL ON qpixel_dump.* TO qpixel@localhost;
GRANT ALL ON qpixel.* TO qpixel@localhost;
```
diff --git a/app/controllers/dumps_controller.rb b/app/controllers/dumps_controller.rb
new file mode 100644
index 000000000..41b461020
--- /dev/null
+++ b/app/controllers/dumps_controller.rb
@@ -0,0 +1,8 @@
+class DumpsController < ApplicationController
+ before_action :authenticate_user!
+
+ def index
+ @latest = Dump.automatic.last
+ @others = Dump.manual
+ end
+end
diff --git a/app/helpers/dumps_helper.rb b/app/helpers/dumps_helper.rb
new file mode 100644
index 000000000..d71fdb018
--- /dev/null
+++ b/app/helpers/dumps_helper.rb
@@ -0,0 +1,2 @@
+module DumpsHelper
+end
diff --git a/app/jobs/data_dump_job.rb b/app/jobs/data_dump_job.rb
new file mode 100644
index 000000000..c425af480
--- /dev/null
+++ b/app/jobs/data_dump_job.rb
@@ -0,0 +1,92 @@
+class DataDumpJob < ApplicationJob
+ queue_as :default
+
+ def perform(drop_db_after: true)
+ permitted = YAML.safe_load_file(Rails.root.join('db/scripts/dump_permitted_columns.yml'))
+ logger.info "Found #{permitted&.size} tables to dump."
+
+ begin
+ exec('SET FOREIGN_KEY_CHECKS = 0;')
+ exec('DROP DATABASE IF EXISTS qpixel_dump;')
+ exec('CREATE DATABASE qpixel_dump;')
+
+ @db_creds = Rails.configuration.database_configuration[Rails.env]
+ @username = @db_creds['username']
+ @password = @db_creds['password']
+ @database = @db_creds['database']
+ @port = @db_creds['port']
+ @host = @db_creds['host']
+
+ mysqldump_command = build_command('mysqldump', '-h', @host, '-u', @username, "-p#{@password}", '-d', @database,
+ '--no-tablespaces', "--port=#{@port}", ssl_state)
+ mysql_command = build_command('mysql', '-h', @host, '-u', @username, "-p#{@password}", "--port=#{@port}",
+ '-D', 'qpixel_dump', ssl_state)
+ logger.debug 'Running system command:'
+ logger.debug "#{mysqldump_command} | #{mysql_command}"
+ copy_success = system("#{mysqldump_command} | #{mysql_command}")
+
+ unless copy_success
+ logger.fatal "Couldn't replicate database: nonzero exit code"
+ return
+ end
+
+ logger.info 'Copied database structure.'
+
+ permitted&.each do |table, data|
+ migrate_table(table, data)
+ end
+
+ logger.info 'Migrated data.'
+
+ file_path = Rails.root.join('tmp/qpixel_export.sql')
+ export_cmd = build_command('mysqldump', '-h', @host, '-u', @username, "-p#{@password}", "--port=#{@port}",
+ 'qpixel_dump', '--no-tablespaces', ssl_state, '>', file_path)
+ logger.debug 'Running system command:'
+ logger.debug export_cmd
+ export_success = system(export_cmd)
+
+ unless export_success
+ logger.fatal "Couldn't export database: nonzero exit code"
+ return
+ end
+
+ logger.info 'Exported database.'
+
+ dump = Dump.create(title: "Data Dump #{Time.now.strftime('%Y-%m-%d')}",
+ comment: "Automatically generated data dump as of #{Time.now.strftime('%Y-%m-%d %H:%M:%S')}.",
+ file: File.open(file_path),
+ automatic: true)
+ Dump.where(automatic: true).where.not(id: dump.id).destroy_all
+ rescue ActiveRecord::ConnectionFailed
+ logger.fatal "Couldn't connect to database. Have you run `GRANT ALL ON qpixel_dump.*` for your DB user?"
+ ensure
+ exec('SET FOREIGN_KEY_CHECKS = 1;')
+ if drop_db_after
+ exec('DROP DATABASE qpixel_dump;')
+ end
+ end
+ end
+
+ private
+
+ def migrate_table(table, data)
+ columns = data['columns']
+ query = data['query']
+ select = "(SELECT #{columns.map { |c| "`#{table}`.`#{c}`" }.join(', ')} FROM #{@database}.#{table} #{query})"
+ full_query = "INSERT INTO qpixel_dump.`#{table}` (#{columns.map { |c| "`#{c}`" }.join(', ')}) #{select}"
+ logger.debug full_query
+ exec(full_query)
+ end
+
+ def exec(sql)
+ ApplicationRecord.connection.execute(sql)
+ end
+
+ def build_command(cmd, *args)
+ "#{cmd} #{args.compact_blank.join(' ')}"
+ end
+
+ def ssl_state
+ '--skip-ssl' if Rails.env.development? || Rails.env.test?
+ end
+end
diff --git a/app/models/dump.rb b/app/models/dump.rb
new file mode 100644
index 000000000..8d60eee3b
--- /dev/null
+++ b/app/models/dump.rb
@@ -0,0 +1,14 @@
+class Dump < ApplicationRecord
+ has_one_attached :file
+
+ before_destroy :delete_file
+
+ scope :automatic, -> { where(automatic: true) }
+ scope :manual, -> { where(automatic: false) }
+
+ private
+
+ def delete_file
+ file.purge
+ end
+end
diff --git a/app/views/dumps/index.html.erb b/app/views/dumps/index.html.erb
new file mode 100644
index 000000000..bf73f07e1
--- /dev/null
+++ b/app/views/dumps/index.html.erb
@@ -0,0 +1,70 @@
+
Data Dumps
+
+
+ Data from all <%= SiteSetting['NetworkName'] %> communities is made available in database format for download here.
+ This data is a weekly export of the entire database, minus any personally identifiable information, moderation data,
+ and some other sensitive information such as who cast votes.
+
+
+
+
+
+ Licensing
+
+
+ This data is provided free of charge as part of our contribution to the commons. If you use post content, you must
+ still abide by the terms of the licenses set by the author of each post.
+
+
+
+Latest data dump
+<% if @latest.nil? %>
+ No data dumps available yet. Check back next week.
+<% else %>
+
+
+
+ |
+ Filename |
+ Created |
+ Download |
+
+
+
+
+ | <%= @latest.title %> |
+ <%= @latest.file.filename %> |
+ <%= @latest.created_at.strftime('%Y-%m-%d') %> |
+ <%= link_to 'Download', rails_blob_path(@latest.file, disposition: 'attachment') %> |
+
+
+
+<% end %>
+
+<% if @others.any? %>
+ Other data dumps
+
+
+
+ |
+ Created |
+ Download |
+
+
+
+ <% @others.each do |dump| %>
+
+ | <%= dump.title %> |
+ <%= dump.created_at.strftime('%Y-%m-%d') %> |
+
+ <% if dump.file.attached? %>
+ <%= link_to 'Download', rails_blob_path(@latest.file, disposition: 'attachment') %>
+ <% elsif dump.link.present? %>
+ <%= link_to 'View', dump.link %>
+ <% end %>
+ |
+
+ <% end %>
+
+
+<% end %>
diff --git a/app/views/layouts/_footer.html.erb b/app/views/layouts/_footer.html.erb
index 53cbcd0f9..75eab99e1 100644
--- a/app/views/layouts/_footer.html.erb
+++ b/app/views/layouts/_footer.html.erb
@@ -7,6 +7,7 @@
<%= link_to 'About Us', '/policy/network-faq' %>
<%= link_to 'Privacy & Safety Center', safety_center_url %>
<%= link_to 'Report harmful content', new_complaint_path %>
+ <%= link_to 'Data dumps', data_dumps_path %>
diff --git a/config/environments/test.rb b/config/environments/test.rb
index feb02ce6a..6e7e78d81 100644
--- a/config/environments/test.rb
+++ b/config/environments/test.rb
@@ -13,7 +13,7 @@
config.cache_classes = false
config.action_view.cache_template_loading = true
- config.log_level = :info
+ config.log_level = ENV['LOG_LEVEL'] || :info
# Do not eager load code on boot. This avoids loading your whole application
# just for the purpose of running a single test. If you are using a tool that
diff --git a/config/routes.rb b/config/routes.rb
index fc4c5ca46..6f4220c4a 100644
--- a/config/routes.rb
+++ b/config/routes.rb
@@ -407,4 +407,8 @@
scope 'network' do
root to: 'fake_community#communities', as: :fc_communities
end
+
+ scope 'data' do
+ root to: 'dumps#index', as: :data_dumps
+ end
end
diff --git a/config/schedule.rb b/config/schedule.rb
index d8472d7b7..d8ffb32de 100644
--- a/config/schedule.rb
+++ b/config/schedule.rb
@@ -34,6 +34,10 @@
runner 'scripts/run_new_thread_followers_cleanup.rb'
end
+every 7.days, at: '04:00' do
+ runner 'scripts/data_dump.rb'
+end
+
every 6.hours do
runner 'scripts/recalc_abilities.rb'
end
diff --git a/db/migrate/20260120032240_create_dumps.rb b/db/migrate/20260120032240_create_dumps.rb
new file mode 100644
index 000000000..6e0053bf9
--- /dev/null
+++ b/db/migrate/20260120032240_create_dumps.rb
@@ -0,0 +1,10 @@
+class CreateDumps < ActiveRecord::Migration[7.2]
+ def change
+ create_table :dumps do |t|
+ t.string :title, null: false
+ t.string :comment
+
+ t.timestamps
+ end
+ end
+end
diff --git a/db/migrate/20260513135320_add_defaults_for_data_dumps.rb b/db/migrate/20260513135320_add_defaults_for_data_dumps.rb
new file mode 100644
index 000000000..5b2014b40
--- /dev/null
+++ b/db/migrate/20260513135320_add_defaults_for_data_dumps.rb
@@ -0,0 +1,12 @@
+class AddDefaultsForDataDumps < ActiveRecord::Migration[7.2]
+ def change
+ # Add a bunch of default values for NOT NULL columns that don't already have them, so that data dumps don't break
+ # when the data in these columns isn't included. Only applies to NOT NULL columns that are NOT included in the dump,
+ # and do not already have a default value.
+ change_column_default :filters, :user_id, -1
+ change_column_default :flags, :escalated, false
+ change_column_default :users, :sign_in_count, 0
+ change_column_default :users, :failed_attempts, 0
+ change_column_default :users, :deleted, false
+ end
+end
diff --git a/db/migrate/20260513160917_add_automatic_to_dumps.rb b/db/migrate/20260513160917_add_automatic_to_dumps.rb
new file mode 100644
index 000000000..c5b5f9b32
--- /dev/null
+++ b/db/migrate/20260513160917_add_automatic_to_dumps.rb
@@ -0,0 +1,5 @@
+class AddAutomaticToDumps < ActiveRecord::Migration[7.2]
+ def change
+ add_column :dumps, :automatic, :boolean, null: false, default: false
+ end
+end
diff --git a/db/migrate/20260513185013_add_link_to_dumps.rb b/db/migrate/20260513185013_add_link_to_dumps.rb
new file mode 100644
index 000000000..d74d7c0b8
--- /dev/null
+++ b/db/migrate/20260513185013_add_link_to_dumps.rb
@@ -0,0 +1,5 @@
+class AddLinkToDumps < ActiveRecord::Migration[7.2]
+ def change
+ add_column :dumps, :link, :string
+ end
+end
diff --git a/db/schema.rb b/db/schema.rb
index 9aab26649..e66a38914 100644
--- a/db/schema.rb
+++ b/db/schema.rb
@@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
-ActiveRecord::Schema[7.2].define(version: 2026_03_22_151439) do
+ActiveRecord::Schema[7.2].define(version: 2026_05_13_185013) do
create_table "abilities", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t|
t.bigint "community_id"
t.string "name"
@@ -268,6 +268,15 @@
t.index ["user_id"], name: "index_complaints_on_user_id"
end
+ create_table "dumps", charset: "utf8mb4", collation: "utf8mb4_unicode_ci", force: :cascade do |t|
+ t.string "title", null: false
+ t.string "comment"
+ t.datetime "created_at", null: false
+ t.datetime "updated_at", null: false
+ t.boolean "automatic", default: false, null: false
+ t.string "link"
+ end
+
create_table "email_logs", charset: "utf8mb4", collation: "utf8mb4_unicode_ci", force: :cascade do |t|
t.string "log_type"
t.string "destination"
@@ -294,7 +303,7 @@
end
create_table "filters", charset: "utf8mb4", collation: "utf8mb4_unicode_ci", force: :cascade do |t|
- t.bigint "user_id", null: false
+ t.bigint "user_id", default: -1, null: false
t.string "name", null: false
t.float "min_score"
t.float "max_score"
diff --git a/db/scripts/dump_permitted_columns.yml b/db/scripts/dump_permitted_columns.yml
new file mode 100644
index 000000000..a425ce250
--- /dev/null
+++ b/db/scripts/dump_permitted_columns.yml
@@ -0,0 +1,463 @@
+abilities:
+ columns:
+ - id
+ - community_id
+ - name
+ - description
+ - internal_id
+ - icon
+ - post_score_threshold
+ - edit_score_threshold
+ - flag_score_threshold
+ - created_at
+ - updated_at
+ - summary
+
+categories:
+ columns:
+ - id
+ - name
+ - short_wiki
+ - community_id
+ - created_at
+ - updated_at
+ - display_post_types
+ - is_homepage
+ - tag_set_id
+ - min_trust_level
+ - button_text
+ - color_code
+ - asking_guidance_override
+ - answering_guidance_override
+ - min_view_trust_level
+ - license_id
+ - sequence
+ - use_for_hot_posts
+ - use_for_advertisement
+ - min_title_length
+ - min_body_length
+ - default_filter_id
+ query: >
+ WHERE min_view_trust_level IS NULL OR min_view_trust_level = 0
+
+categories_moderator_tags:
+ columns:
+ - category_id
+ - tag_id
+ query: >
+ INNER JOIN categories c ON c.id = categories_moderator_tags.category_id
+ WHERE c.min_view_trust_level IS NULL OR c.min_view_trust_level = 0
+
+categories_post_types:
+ columns:
+ - id
+ - category_id
+ - post_type_id
+ - upvote_rep
+ - downvote_rep
+ query: >
+ INNER JOIN categories c ON c.id = categories_post_types.category_id
+ WHERE c.min_view_trust_level IS NULL OR c.min_view_trust_level = 0
+
+categories_required_tags:
+ columns:
+ - category_id
+ - tag_id
+ query: >
+ INNER JOIN categories c ON c.id = categories_required_tags.category_id
+ WHERE c.min_view_trust_level IS NULL OR c.min_view_trust_level = 0
+
+categories_topic_tags:
+ columns:
+ - category_id
+ - tag_id
+ query: >
+ INNER JOIN categories c ON c.id = categories_topic_tags.category_id
+ WHERE c.min_view_trust_level IS NULL OR c.min_view_trust_level = 0
+
+close_reasons:
+ columns:
+ - id
+ - name
+ - description
+ - active
+ - requires_other_post
+ - community_id
+
+comment_threads:
+ columns:
+ - id
+ - title
+ - reply_count
+ - post_id
+ - locked
+ - locked_by_id
+ - locked_until
+ - archived
+ - archived_by_id
+ - ever_archived_before
+ - deleted
+ - deleted_by_id
+ - created_at
+ - updated_at
+ - community_id
+ - locked_at
+ - last_activity_at
+ query: >
+ WHERE deleted = FALSE
+
+comments:
+ columns:
+ - id
+ - created_at
+ - updated_at
+ - post_id
+ - content
+ - deleted
+ - user_id
+ - community_id
+ - comment_thread_id
+ - has_reference
+ - reference_text
+ - references_comment_id
+ - last_activity_at
+ query: >
+ WHERE deleted = FALSE
+
+communities:
+ columns:
+ - id
+ - name
+ - host
+ - created_at
+ - updated_at
+ - is_fake
+ - hidden
+ query: >
+ WHERE is_fake = FALSE AND hidden = FALSE
+
+community_users:
+ columns:
+ - id
+ - community_id
+ - user_id
+ - is_moderator
+ - is_admin
+ - reputation
+ - created_at
+ - updated_at
+ - trust_level
+ - deleted
+ - deleted_at
+ - deleted_by_id
+ - post_count
+ query: >
+ WHERE deleted = FALSE
+
+filters:
+ columns:
+ - id
+ - name
+ - min_score
+ - max_score
+ - min_answers
+ - max_answers
+ - status
+ - include_tags
+ - exclude_tags
+ - created_at
+ - updated_at
+ - source
+
+flags:
+ columns:
+ - id
+ - created_at
+ - updated_at
+ - post_id
+ - status
+ - community_id
+ - post_flag_type_id
+ - post_type
+ query: >
+ INNER JOIN post_flag_types pft ON pft.id = flags.post_flag_type_id
+ WHERE pft.confidential = FALSE
+
+licenses:
+ columns:
+ - id
+ - name
+ - url
+ - default
+ - community_id
+ - created_at
+ - updated_at
+ - enabled
+ - description
+
+pinned_links:
+ columns:
+ - id
+ - community_id
+ - label
+ - link
+ - post_id
+ - active
+ - shown_after
+ - shown_before
+ - created_at
+ - updated_at
+
+post_flag_types:
+ columns:
+ - id
+ - community_id
+ - name
+ - description
+ - confidential
+ - active
+ - post_type_id
+ - created_at
+ - updated_at
+ - requires_details
+
+post_histories:
+ columns:
+ - id
+ - post_history_type_id
+ - user_id
+ - created_at
+ - updated_at
+ - post_id
+ - before_state
+ - after_state
+ - comment
+ - community_id
+ - before_title
+ - after_title
+ - hidden
+ query: >
+ WHERE hidden = FALSE
+
+post_history_tags:
+ columns:
+ - id
+ - post_history_id
+ - tag_id
+ - relationship
+ - created_at
+ - updated_at
+
+post_history_types:
+ columns:
+ - id
+ - name
+ - description
+ - created_at
+ - updated_at
+
+post_types:
+ columns:
+ - id
+ - name
+ - description
+ - has_answers
+ - has_votes
+ - has_tags
+ - has_parent
+ - has_category
+ - has_license
+ - is_public_editable
+ - is_closeable
+ - is_top_level
+ - is_freely_editable
+ - icon_name
+ - has_reactions
+ - answer_type_id
+ - has_only_specific_reactions
+
+posts:
+ columns:
+ - id
+ - title
+ - body
+ - tags_cache
+ - score
+ - parent_id
+ - user_id
+ - closed
+ - closed_by_id
+ - closed_at
+ - deleted
+ - deleted_by_id
+ - deleted_at
+ - created_at
+ - updated_at
+ - post_type_id
+ - body_markdown
+ - answer_count
+ - last_activity
+ - att_source
+ - att_license_name
+ - att_license_link
+ - doc_slug
+ - last_activity_by_id
+ - community_id
+ - close_reason_id
+ - duplicate_post_id
+ - category_id
+ - license_id
+ - help_category
+ - help_ordering
+ - upvote_count
+ - downvote_count
+ - comments_disabled
+ - last_edited_at
+ - last_edited_by_id
+ - locked
+ - locked_by_id
+ - locked_at
+ - locked_until
+ query: >
+ INNER JOIN categories c ON c.id = posts.category_id
+ WHERE posts.deleted = FALSE AND posts.doc_slug IS NULL
+ AND (c.min_view_trust_level IS NULL OR c.min_view_trust_level = 0)
+
+posts_tags:
+ columns:
+ - tag_id
+ - post_id
+
+reaction_types:
+ columns:
+ - id
+ - name
+ - description
+ - on_post_label
+ - icon
+ - color
+ - requires_comment
+ - community_id
+ - position
+ - created_at
+ - updated_at
+ - active
+ - post_type_id
+
+reactions:
+ columns:
+ - id
+ - user_id
+ - reaction_type_id
+ - post_id
+ - comment_id
+ - created_at
+ - updated_at
+
+suggested_edits:
+ columns:
+ - id
+ - post_id
+ - user_id
+ - community_id
+ - body
+ - title
+ - tags_cache
+ - body_markdown
+ - comment
+ - active
+ - accepted
+ - decided_at
+ - decided_by_id
+ - rejected_comment
+ - created_at
+ - updated_at
+ - before_title
+ - before_body
+ - before_body_markdown
+ - before_tags_cache
+
+suggested_edits_before_tags:
+ columns:
+ - suggested_edit_id
+ - tag_id
+
+suggested_edits_tags:
+ columns:
+ - suggested_edit_id
+ - tag_id
+
+tag_sets:
+ columns:
+ - id
+ - name
+ - community_id
+ - created_at
+ - updated_at
+
+tag_synonyms:
+ columns:
+ - id
+ - tag_id
+ - name
+ - created_at
+ - updated_at
+
+tags:
+ columns:
+ - id
+ - name
+ - created_at
+ - updated_at
+ - community_id
+ - tag_set_id
+ - wiki_markdown
+ - wiki
+ - excerpt
+ - parent_id
+
+user_abilities:
+ columns:
+ - id
+ - community_user_id
+ - ability_id
+ - is_suspended
+ - suspension_end
+ - suspension_message
+ - created_at
+ - updated_at
+ query: >
+ WHERE is_suspended = FALSE
+
+user_websites:
+ columns:
+ - id
+ - label
+ - url
+ - position
+ - user_id
+
+users:
+ columns:
+ - id
+ - created_at
+ - updated_at
+ - is_global_moderator
+ - is_global_admin
+ - username
+ - profile
+ - profile_markdown
+ - staff
+ - trust_level
+ query: >
+ WHERE deleted = FALSE
+
+votes:
+ columns:
+ - id
+ - vote_type
+ - created_at
+ - updated_at
+ - post_id
+ - recv_user_id
+ - community_id
diff --git a/docker-compose.yml b/docker-compose.yml
index 556c4b818..846523ef2 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -5,10 +5,10 @@ services:
context: "."
dockerfile: docker/Dockerfile.db
volumes:
+ - ./docker/db-entrypoint.sh:/docker/entrypoint.sh
- ./docker/mysql:/var/lib/mysql
env_file:
- ${ENV_FILE_LOCATION}
- command: mysqld --mysql-native-password=on --skip-mysqlx
cap_add:
- SYS_NICE
healthcheck:
diff --git a/docker/Dockerfile.db b/docker/Dockerfile.db
index 41875ec29..bc9a17213 100644
--- a/docker/Dockerfile.db
+++ b/docker/Dockerfile.db
@@ -1,6 +1,7 @@
FROM mysql:8.4.2
-# docker build -t qpixel_db -f docker/Dockerfile.db .
-
# These commands will be run on init of the container
COPY docker/mysql-init.sql /docker-entrypoint-initdb.d/mysql-init.sql
+
+ENTRYPOINT ["/bin/bash"]
+CMD ["/docker/entrypoint.sh"]
diff --git a/docker/db-entrypoint.sh b/docker/db-entrypoint.sh
new file mode 100644
index 000000000..5c7885682
--- /dev/null
+++ b/docker/db-entrypoint.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+mysqld --init-file=/docker-entrypoint-initdb.d/mysql-init.sql \
+ --mysql-native-password=on \
+ --skip-mysqlx
diff --git a/docker/mysql-init.sql b/docker/mysql-init.sql
index 510059bb9..521fda2bf 100644
--- a/docker/mysql-init.sql
+++ b/docker/mysql-init.sql
@@ -3,4 +3,5 @@
if you change your environment file, you need to update database names here */
GRANT ALL ON qpixel_dev.* TO qpixel;
GRANT ALL ON qpixel_test.* TO qpixel;
+GRANT ALL ON qpixel_dump.* TO qpixel;
GRANT ALL ON qpixel.* TO qpixel;
diff --git a/scripts/data_dump.rb b/scripts/data_dump.rb
new file mode 100644
index 000000000..c37413ce7
--- /dev/null
+++ b/scripts/data_dump.rb
@@ -0,0 +1 @@
+DataDumpJob.perform_later
diff --git a/test/controllers/dumps_controller_test.rb b/test/controllers/dumps_controller_test.rb
new file mode 100644
index 000000000..957059ef6
--- /dev/null
+++ b/test/controllers/dumps_controller_test.rb
@@ -0,0 +1,7 @@
+require 'test_helper'
+
+class DumpsControllerTest < ActionDispatch::IntegrationTest
+ # test "the truth" do
+ # assert true
+ # end
+end
diff --git a/test/fixtures/dumps.yml b/test/fixtures/dumps.yml
new file mode 100644
index 000000000..f6a49061e
--- /dev/null
+++ b/test/fixtures/dumps.yml
@@ -0,0 +1,8 @@
+# Read about fixtures at https://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html
+
+without_comment:
+ title: Data dump without a comment
+
+with_comment:
+ title: Data dump with comment
+ comment: we decided to include a helpful comment this time
diff --git a/test/jobs/data_dump_job_test.rb b/test/jobs/data_dump_job_test.rb
new file mode 100644
index 000000000..ea36b10c4
--- /dev/null
+++ b/test/jobs/data_dump_job_test.rb
@@ -0,0 +1,38 @@
+require 'test_helper'
+
+class DataDumpJobTest < ActiveJob::TestCase
+ self.use_transactional_tests = false
+
+ test 'job runs successfully' do
+ perform_enqueued_jobs do
+ DataDumpJob.perform_later
+ end
+ end
+
+ test 'no excluded data present in final dump DB' do
+ perform_enqueued_jobs do
+ DataDumpJob.perform_later(drop_db_after: false)
+ end
+
+ conn = ApplicationRecord.connection
+ all_columns = conn.tables.to_h { |t| [t, conn.columns(t).map(&:name)] }
+ permitted = YAML.safe_load_file(Rails.root.join('db/scripts/dump_permitted_columns.yml'))
+ excluded_cols = all_columns.to_h do |t, cols|
+ [
+ t,
+ cols.reject { |c| permitted.include?(t) && permitted[t]['columns'].include?(c) }
+ ]
+ end
+ excluded_cols = excluded_cols.reject { |_t, cols| cols.empty? }
+
+ excluded_cols.each do |table, cols|
+ query = "SELECT #{cols.map { |c| "`#{c}`" }.join(', ')} FROM qpixel_dump.`#{table}`"
+ results = conn.execute(query).to_a
+ results.transpose.each.with_index do |col, i|
+ # EITHER all values in the column should be nil, OR all values in the column should be identical (which implies
+ # a default value was applied), for us to be happy that there is no true data in the column.
+ assert col.all?(&:nil?) || col.uniq.size <= 1, "Distinct non-null data present in column `#{table}`.`#{cols[i]}`"
+ end
+ end
+ end
+end
diff --git a/test/models/dump_test.rb b/test/models/dump_test.rb
new file mode 100644
index 000000000..3c9dab9a3
--- /dev/null
+++ b/test/models/dump_test.rb
@@ -0,0 +1,7 @@
+require 'test_helper'
+
+class DumpTest < ActiveSupport::TestCase
+ # test "the truth" do
+ # assert true
+ # end
+end
diff --git a/test/test_helper.rb b/test/test_helper.rb
index 7befbb484..fa985b0a9 100644
--- a/test/test_helper.rb
+++ b/test/test_helper.rb
@@ -1,3 +1,6 @@
+# Silence the irritating already initialized constant warning
+$VERBOSE = nil
+
require 'simplecov'
require 'simplecov_json_formatter'
SimpleCov.formatter = SimpleCov::Formatter::JSONFormatter