From 58b2ea5abef8ad9c2c809699f23f20a38e77ed25 Mon Sep 17 00:00:00 2001 From: Oleg Valter Date: Tue, 20 Jan 2026 06:23:26 +0300 Subject: [PATCH 01/36] initial boilerplate for data dumps (mirror Metasmoke) --- app/controllers/dumps_controller.rb | 2 ++ app/helpers/dumps_helper.rb | 2 ++ app/views/dumps/index.htrml.erb | 0 db/migrate/20260120032240_create_dumps.rb | 7 +++++++ test/controllers/dumps_controller_test.rb | 7 +++++++ 5 files changed, 18 insertions(+) create mode 100644 app/controllers/dumps_controller.rb create mode 100644 app/helpers/dumps_helper.rb create mode 100644 app/views/dumps/index.htrml.erb create mode 100644 db/migrate/20260120032240_create_dumps.rb create mode 100644 test/controllers/dumps_controller_test.rb diff --git a/app/controllers/dumps_controller.rb b/app/controllers/dumps_controller.rb new file mode 100644 index 000000000..80f2b2669 --- /dev/null +++ b/app/controllers/dumps_controller.rb @@ -0,0 +1,2 @@ +class DumpsController < ApplicationController +end diff --git a/app/helpers/dumps_helper.rb b/app/helpers/dumps_helper.rb new file mode 100644 index 000000000..d71fdb018 --- /dev/null +++ b/app/helpers/dumps_helper.rb @@ -0,0 +1,2 @@ +module DumpsHelper +end diff --git a/app/views/dumps/index.htrml.erb b/app/views/dumps/index.htrml.erb new file mode 100644 index 000000000..e69de29bb diff --git a/db/migrate/20260120032240_create_dumps.rb b/db/migrate/20260120032240_create_dumps.rb new file mode 100644 index 000000000..e657514e4 --- /dev/null +++ b/db/migrate/20260120032240_create_dumps.rb @@ -0,0 +1,7 @@ +class CreateDumps < ActiveRecord::Migration[7.2] + def change + create_table :dumps do |t| + t.timestamps + end + end +end diff --git a/test/controllers/dumps_controller_test.rb b/test/controllers/dumps_controller_test.rb new file mode 100644 index 000000000..957059ef6 --- /dev/null +++ b/test/controllers/dumps_controller_test.rb @@ -0,0 +1,7 @@ +require 'test_helper' + +class DumpsControllerTest < ActionDispatch::IntegrationTest + # test "the truth" do + # assert true + # end +end From 88fd12def355ada55a0252d39fe9079aca482eed Mon Sep 17 00:00:00 2001 From: Oleg Valter Date: Tue, 20 Jan 2026 08:31:06 +0300 Subject: [PATCH 02/36] dups should have a required title & optional comment --- db/migrate/20260120032240_create_dumps.rb | 3 +++ db/schema.rb | 9 ++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/db/migrate/20260120032240_create_dumps.rb b/db/migrate/20260120032240_create_dumps.rb index e657514e4..6e0053bf9 100644 --- a/db/migrate/20260120032240_create_dumps.rb +++ b/db/migrate/20260120032240_create_dumps.rb @@ -1,6 +1,9 @@ class CreateDumps < ActiveRecord::Migration[7.2] def change create_table :dumps do |t| + t.string :title, null: false + t.string :comment + t.timestamps end end diff --git a/db/schema.rb b/db/schema.rb index 07e7a08a1..aa124a96e 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.2].define(version: 2025_12_21_142105) do +ActiveRecord::Schema[7.2].define(version: 2026_01_20_032240) do create_table "abilities", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t| t.bigint "community_id" t.string "name" @@ -267,6 +267,13 @@ t.index ["user_id"], name: "index_complaints_on_user_id" end + create_table "dumps", charset: "utf8mb4", collation: "utf8mb4_unicode_ci", force: :cascade do |t| + t.string "title", null: false + t.string "comment" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + end + create_table "email_logs", charset: "utf8mb4", collation: "utf8mb4_unicode_ci", force: :cascade do |t| t.string "log_type" t.string "destination" From 59978295422fcdfb47714a4a9faa9fe3b6dc97fb Mon Sep 17 00:00:00 2001 From: Oleg Valter Date: Tue, 20 Jan 2026 08:31:53 +0300 Subject: [PATCH 03/36] initial boilerplate for data dump model --- app/models/dump.rb | 3 +++ test/fixtures/dumps.yml | 11 +++++++++++ test/models/dump_test.rb | 7 +++++++ 3 files changed, 21 insertions(+) create mode 100644 app/models/dump.rb create mode 100644 test/fixtures/dumps.yml create mode 100644 test/models/dump_test.rb diff --git a/app/models/dump.rb b/app/models/dump.rb new file mode 100644 index 000000000..10ac3960e --- /dev/null +++ b/app/models/dump.rb @@ -0,0 +1,3 @@ +class Dump < ApplicationRecord + has_one_attached :file +end diff --git a/test/fixtures/dumps.yml b/test/fixtures/dumps.yml new file mode 100644 index 000000000..d7a332924 --- /dev/null +++ b/test/fixtures/dumps.yml @@ -0,0 +1,11 @@ +# Read about fixtures at https://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html + +# This model initially had no columns defined. If you add columns to the +# model remove the "{}" from the fixture names and add the columns immediately +# below each fixture, per the syntax in the comments below +# +one: {} +# column: value +# +two: {} +# column: value diff --git a/test/models/dump_test.rb b/test/models/dump_test.rb new file mode 100644 index 000000000..3c9dab9a3 --- /dev/null +++ b/test/models/dump_test.rb @@ -0,0 +1,7 @@ +require 'test_helper' + +class DumpTest < ActiveSupport::TestCase + # test "the truth" do + # assert true + # end +end From 8fca596f8220cc167c73ff01ce625af186f4f7c7 Mon Sep 17 00:00:00 2001 From: Oleg Valter Date: Tue, 20 Jan 2026 09:01:01 +0300 Subject: [PATCH 04/36] first data dump fixtures (fixing tests) --- test/fixtures/dumps.yml | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/test/fixtures/dumps.yml b/test/fixtures/dumps.yml index d7a332924..f6a49061e 100644 --- a/test/fixtures/dumps.yml +++ b/test/fixtures/dumps.yml @@ -1,11 +1,8 @@ # Read about fixtures at https://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html -# This model initially had no columns defined. If you add columns to the -# model remove the "{}" from the fixture names and add the columns immediately -# below each fixture, per the syntax in the comments below -# -one: {} -# column: value -# -two: {} -# column: value +without_comment: + title: Data dump without a comment + +with_comment: + title: Data dump with comment + comment: we decided to include a helpful comment this time From efa5fa564f440a2f8797ac430472ab45e4cbca37 Mon Sep 17 00:00:00 2001 From: Oleg Valter Date: Tue, 20 Jan 2026 06:23:26 +0300 Subject: [PATCH 05/36] initial boilerplate for data dumps (mirror Metasmoke) --- app/controllers/dumps_controller.rb | 2 ++ app/helpers/dumps_helper.rb | 2 ++ app/views/dumps/index.htrml.erb | 0 db/migrate/20260120032240_create_dumps.rb | 7 +++++++ test/controllers/dumps_controller_test.rb | 7 +++++++ 5 files changed, 18 insertions(+) create mode 100644 app/controllers/dumps_controller.rb create mode 100644 app/helpers/dumps_helper.rb create mode 100644 app/views/dumps/index.htrml.erb create mode 100644 db/migrate/20260120032240_create_dumps.rb create mode 100644 test/controllers/dumps_controller_test.rb diff --git a/app/controllers/dumps_controller.rb b/app/controllers/dumps_controller.rb new file mode 100644 index 000000000..80f2b2669 --- /dev/null +++ b/app/controllers/dumps_controller.rb @@ -0,0 +1,2 @@ +class DumpsController < ApplicationController +end diff --git a/app/helpers/dumps_helper.rb b/app/helpers/dumps_helper.rb new file mode 100644 index 000000000..d71fdb018 --- /dev/null +++ b/app/helpers/dumps_helper.rb @@ -0,0 +1,2 @@ +module DumpsHelper +end diff --git a/app/views/dumps/index.htrml.erb b/app/views/dumps/index.htrml.erb new file mode 100644 index 000000000..e69de29bb diff --git a/db/migrate/20260120032240_create_dumps.rb b/db/migrate/20260120032240_create_dumps.rb new file mode 100644 index 000000000..e657514e4 --- /dev/null +++ b/db/migrate/20260120032240_create_dumps.rb @@ -0,0 +1,7 @@ +class CreateDumps < ActiveRecord::Migration[7.2] + def change + create_table :dumps do |t| + t.timestamps + end + end +end diff --git a/test/controllers/dumps_controller_test.rb b/test/controllers/dumps_controller_test.rb new file mode 100644 index 000000000..957059ef6 --- /dev/null +++ b/test/controllers/dumps_controller_test.rb @@ -0,0 +1,7 @@ +require 'test_helper' + +class DumpsControllerTest < ActionDispatch::IntegrationTest + # test "the truth" do + # assert true + # end +end From 540e07ccc3b11e666e3a50fde6c8c7ba9ece684e Mon Sep 17 00:00:00 2001 From: Oleg Valter Date: Tue, 20 Jan 2026 08:31:06 +0300 Subject: [PATCH 06/36] dups should have a required title & optional comment --- db/migrate/20260120032240_create_dumps.rb | 3 +++ db/schema.rb | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/db/migrate/20260120032240_create_dumps.rb b/db/migrate/20260120032240_create_dumps.rb index e657514e4..6e0053bf9 100644 --- a/db/migrate/20260120032240_create_dumps.rb +++ b/db/migrate/20260120032240_create_dumps.rb @@ -1,6 +1,9 @@ class CreateDumps < ActiveRecord::Migration[7.2] def change create_table :dumps do |t| + t.string :title, null: false + t.string :comment + t.timestamps end end diff --git a/db/schema.rb b/db/schema.rb index 9aab26649..7c7abca20 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -268,6 +268,13 @@ t.index ["user_id"], name: "index_complaints_on_user_id" end + create_table "dumps", charset: "utf8mb4", collation: "utf8mb4_unicode_ci", force: :cascade do |t| + t.string "title", null: false + t.string "comment" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + end + create_table "email_logs", charset: "utf8mb4", collation: "utf8mb4_unicode_ci", force: :cascade do |t| t.string "log_type" t.string "destination" From dec449aaaaf7dcd7c7c1d34888cf249bd5e55d47 Mon Sep 17 00:00:00 2001 From: Oleg Valter Date: Tue, 20 Jan 2026 08:31:53 +0300 Subject: [PATCH 07/36] initial boilerplate for data dump model --- app/models/dump.rb | 3 +++ test/fixtures/dumps.yml | 11 +++++++++++ test/models/dump_test.rb | 7 +++++++ 3 files changed, 21 insertions(+) create mode 100644 app/models/dump.rb create mode 100644 test/fixtures/dumps.yml create mode 100644 test/models/dump_test.rb diff --git a/app/models/dump.rb b/app/models/dump.rb new file mode 100644 index 000000000..10ac3960e --- /dev/null +++ b/app/models/dump.rb @@ -0,0 +1,3 @@ +class Dump < ApplicationRecord + has_one_attached :file +end diff --git a/test/fixtures/dumps.yml b/test/fixtures/dumps.yml new file mode 100644 index 000000000..d7a332924 --- /dev/null +++ b/test/fixtures/dumps.yml @@ -0,0 +1,11 @@ +# Read about fixtures at https://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html + +# This model initially had no columns defined. If you add columns to the +# model remove the "{}" from the fixture names and add the columns immediately +# below each fixture, per the syntax in the comments below +# +one: {} +# column: value +# +two: {} +# column: value diff --git a/test/models/dump_test.rb b/test/models/dump_test.rb new file mode 100644 index 000000000..3c9dab9a3 --- /dev/null +++ b/test/models/dump_test.rb @@ -0,0 +1,7 @@ +require 'test_helper' + +class DumpTest < ActiveSupport::TestCase + # test "the truth" do + # assert true + # end +end From bb6d5f809360b988e601993f00dfbfbe247e882f Mon Sep 17 00:00:00 2001 From: Oleg Valter Date: Tue, 20 Jan 2026 09:01:01 +0300 Subject: [PATCH 08/36] first data dump fixtures (fixing tests) --- test/fixtures/dumps.yml | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/test/fixtures/dumps.yml b/test/fixtures/dumps.yml index d7a332924..f6a49061e 100644 --- a/test/fixtures/dumps.yml +++ b/test/fixtures/dumps.yml @@ -1,11 +1,8 @@ # Read about fixtures at https://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html -# This model initially had no columns defined. If you add columns to the -# model remove the "{}" from the fixture names and add the columns immediately -# below each fixture, per the syntax in the comments below -# -one: {} -# column: value -# -two: {} -# column: value +without_comment: + title: Data dump without a comment + +with_comment: + title: Data dump with comment + comment: we decided to include a helpful comment this time From 5aba30cdb33cb6727999cb2288a5bb6d0a47ebf3 Mon Sep 17 00:00:00 2001 From: ArtOfCode- Date: Tue, 12 May 2026 21:59:40 +0100 Subject: [PATCH 09/36] Create scaffolding for pulling data --- db/scripts/data_dump.rb | 3 + db/scripts/dump_permitted_columns.yml | 463 ++++++++++++++++++++++++++ 2 files changed, 466 insertions(+) create mode 100644 db/scripts/data_dump.rb create mode 100644 db/scripts/dump_permitted_columns.yml diff --git a/db/scripts/data_dump.rb b/db/scripts/data_dump.rb new file mode 100644 index 000000000..9500587bd --- /dev/null +++ b/db/scripts/data_dump.rb @@ -0,0 +1,3 @@ +permitted = YAML.safe_load(File.read(Rails.root.join('db/scripts/dump_permitted_columns.yml'))) + + diff --git a/db/scripts/dump_permitted_columns.yml b/db/scripts/dump_permitted_columns.yml new file mode 100644 index 000000000..14a2ce5c8 --- /dev/null +++ b/db/scripts/dump_permitted_columns.yml @@ -0,0 +1,463 @@ +abilities: + columns: + - id + - community_id + - name + - description + - internal_id + - icon + - post_score_threshold + - edit_score_threshold + - flag_score_threshold + - created_at + - updated_at + - summary + +categories: + columns: + - id + - name + - short_wiki + - community_id + - created_at + - updated_at + - display_post_types + - is_homepage + - tag_set_id + - min_trust_level + - button_text + - color_code + - asking_guidance_override + - answering_guidance_override + - min_view_trust_level + - license_id + - sequence + - use_for_hot_posts + - use_for_advertisement + - min_title_length + - min_body_length + - default_filter_id + query: > + WHERE min_view_trust_level IS NULL OR min_view_trust_level = 0 + +categories_moderator_tags: + columns: + - category_id + - tag_id + query: > + INNER JOIN categories c ON c.id = categories_moderator_tags.category_id + WHERE c.min_view_trust_level IS NULL OR c.min_view_trust_level = 0 + +categories_post_types: + columns: + - id + - category_id + - post_type_id + - upvote_rep + - downvote_rep + query: > + INNER JOIN categories c ON c.id = categories_post_types.category_id + WHERE c.min_view_trust_level IS NULL OR c.min_view_trust_level = 0 + +categories_required_tags: + columns: + - category_id + - tag_id + query: > + INNER JOIN categories c ON c.id = categories_required_tags.category_id + WHERE c.min_view_trust_level IS NULL OR c.min_view_trust_level = 0 + +categories_topic_tags: + columns: + - category_id + - tag_id + query: > + INNER JOIN categories c ON c.id = categories_topic_tags.category_id + WHERE c.min_view_trust_level IS NULL OR c.min_view_trust_level = 0 + +close_reasons: + columns: + - id + - name + - description + - active + - requires_other_post + - community_id + +comment_threads: + columns: + - id + - title + - reply_count + - post_id + - locked + - locked_by_id + - locked_until + - archived + - archived_by_id + - ever_archived_before + - deleted + - deleted_by_id + - created_at + - updated_at + - community_id + - locked_at + - last_activity_at + query: > + WHERE deleted = FALSE + +comments: + columns: + - id + - created_at + - updated_at + - post_id + - content + - deleted + - user_id + - community_id + - comment_thread_id + - has_reference + - reference_text + - references_comment_id + - last_activity_at + query: > + WHERE deleted = FALSE + +communities: + columns: + - id + - name + - host + - created_at + - updated_at + - is_fake + - hidden + query: > + WHERE is_fake = FALSE AND hidden = FALSE + +community_users: + columns: + - id + - community_id + - user_id + - is_moderator + - is_admin + - reputation + - created_at + - updated_at + - trust_level + - deleted + - deleted_at + - deleted_by_id + - post_count + query: > + WHERE deleted = FALSE + +filters: + columns: + - id + - name + - min_score + - max_score + - min_answers + - max_answers + - status + - include_tags + - exclude_tags + - created_at + - updated_at + - source + +flags: + columns: + - id + - created_at + - updated_at + - post_id + - status + - community_id + - post_flag_type_id + - post_type + query: > + INNER JOIN post_flag_type pft ON pft.id = flags.post_flag_type_id + WHERE pft.confidential = FALSE + +licenses: + columns: + - id + - name + - url + - default + - community_id + - created_at + - updated_at + - enabled + - description + +pinned_links: + columns: + - id + - community_id + - label + - link + - post_id + - active + - shown_after + - shown_before + - created_at + - updated_at + +post_flag_types: + columns: + - id + - community_id + - name + - description + - confidential + - active + - post_type_id + - created_at + - updated_at + - requires_details + +post_histories: + columns: + - id + - post_history_type_id + - user_id + - created_at + - updated_at + - post_id + - before_state + - after_state + - comment + - community_id + - before_title + - after_title + - hidden + query: > + WHERE hidden = FALSE + +post_history_tags: + columns: + - id + - post_history_id + - tag_id + - relationship + - created_at + - updated_at + +post_history_types: + columns: + - id + - name + - description + - created_at + - updated_at + +post_types: + columns: + - id + - name + - description + - has_answers + - has_votes + - has_tags + - has_parent + - has_category + - has_license + - is_public_editable + - is_closeable + - is_top_level + - is_freely_editable + - icon_name + - has_reactions + - answer_type_id + - has_only_specific_reactions + +posts: + columns: + - id + - title + - body + - tags_cache + - score + - parent_id + - user_id + - closed + - closed_by_id + - closed_at + - deleted + - deleted_by_id + - deleted_at + - created_at + - updated_at + - post_type_id + - body_markdown + - answer_count + - last_activity + - att_source + - att_license_name + - att_license_link + - doc_slug + - last_activity_by_id + - community_id + - close_reason_id + - duplicate_post_id + - category_id + - license_id + - help_category + - help_ordering + - upvote_count + - downvote_count + - comments_disabled + - last_edited_at + - last_edited_by_id + - locked + - locked_by_id + - locked_at + - locked_until + query: > + INNER JOIN categories c ON c.id = posts.category_id + WHERE posts.deleted = FALSE AND posts.doc_slug IS NULL + AND (c.min_view_trust_level IS NULL OR c.min_view_trust_level = 0) + +posts_tags: + columns: + - tag_id + - post_id + +reaction_types: + columns: + - id + - name + - description + - on_post_label + - icon + - color + - requires_comment + - community_id + - position + - created_at + - updated_at + - active + - post_type_id + +reactions: + columns: + - id + - user_id + - reaction_type_id + - post_id + - comment_id + - created_at + - updated_at + +suggested_edits: + columns: + - id + - post_id + - user_id + - community_id + - body + - title + - tags_cache + - body_markdown + - comment + - active + - accepted + - decided_at + - decided_by_id + - rejected_comment + - created_at + - updated_at + - before_title + - before_body + - before_body_markdown + - before_tags_cache + +suggested_edits_before_tags: + columns: + - suggested_edit_id + - tag_id + +suggested_edits_tags: + columns: + - suggested_edit_id + - tag_id + +tag_sets: + columns: + - id + - name + - community_id + - created_at + - updated_at + +tag_synonyms: + columns: + - id + - tag_id + - name + - created_at + - updated_at + +tags: + columns: + - id + - name + - created_at + - updated_at + - community_id + - tag_set_id + - wiki_markdown + - wiki + - excerpt + - parent_id + +user_abilities: + columns: + - id + - community_user_id + - ability_id + - is_suspended + - suspension_end + - suspension_message + - created_at + - updated_at + query: > + WHERE is_suspended = FALSE + +user_websites: + columns: + - id + - label + - url + - position + - user_id + +users: + columns: + - id + - created_at + - updated_at + - is_global_moderator + - is_global_admin + - username + - profile + - profile_markdown + - staff + - trust_level + query: > + WHERE deleted = FALSE + +votes: + columns: + - id + - vote_type + - created_at + - updated_at + - post_id + - recv_user_id + - community_id From 96a1b4ddcc155ea8a89cebe5d311fe1e01e46a87 Mon Sep 17 00:00:00 2001 From: ArtOfCode- Date: Tue, 12 May 2026 22:30:13 +0100 Subject: [PATCH 10/36] Skeleton processing --- app/jobs/data_dump_job.rb | 29 +++++++++++++++++++++++++++++ db/scripts/data_dump.rb | 3 --- test/jobs/data_dump_job_test.rb | 7 +++++++ 3 files changed, 36 insertions(+), 3 deletions(-) create mode 100644 app/jobs/data_dump_job.rb delete mode 100644 db/scripts/data_dump.rb create mode 100644 test/jobs/data_dump_job_test.rb diff --git a/app/jobs/data_dump_job.rb b/app/jobs/data_dump_job.rb new file mode 100644 index 000000000..3f0016ba0 --- /dev/null +++ b/app/jobs/data_dump_job.rb @@ -0,0 +1,29 @@ +class DataDumpJob < ApplicationJob + queue_as :default + + def perform(*args) + permitted = YAML.safe_load(File.read(Rails.root.join('db/scripts/dump_permitted_columns.yml'))) + logger.info "Found #{permitted&.size} tables to dump." + + # Create backup database + # Mirror DB structure (mysqldump | mysql?) + + permitted&.each do |table, data| + results = pull_table_data(table, data) + end + + # Dump all of the results into the backup DB + # Export backup DB to file + # Upload dump somewhere + # Create Dump record + # Delete backup DB + end + + def pull_table_data(table, data) + columns = data['columns'] + query = data['query'] + full_query = "SELECT #{columns.map { |c| "#{table}.#{c}" }.join(', ')} FROM #{table} #{query}" + logger.debug full_query + ApplicationRecord.connection.execute(full_query).to_a + end +end diff --git a/db/scripts/data_dump.rb b/db/scripts/data_dump.rb deleted file mode 100644 index 9500587bd..000000000 --- a/db/scripts/data_dump.rb +++ /dev/null @@ -1,3 +0,0 @@ -permitted = YAML.safe_load(File.read(Rails.root.join('db/scripts/dump_permitted_columns.yml'))) - - diff --git a/test/jobs/data_dump_job_test.rb b/test/jobs/data_dump_job_test.rb new file mode 100644 index 000000000..fb6b0030e --- /dev/null +++ b/test/jobs/data_dump_job_test.rb @@ -0,0 +1,7 @@ +require "test_helper" + +class DataDumpJobTest < ActiveJob::TestCase + # test "the truth" do + # assert true + # end +end From 47be0f21c3481918dfff5ccd55a79ebcaf47ee0c Mon Sep 17 00:00:00 2001 From: ArtOfCode- Date: Wed, 13 May 2026 15:01:26 +0100 Subject: [PATCH 11/36] Successfully copying data to dump database --- app/jobs/data_dump_job.rb | 49 ++++++++++++++----- ...60513135320_add_defaults_for_data_dumps.rb | 12 +++++ db/schema.rb | 4 +- db/scripts/dump_permitted_columns.yml | 2 +- 4 files changed, 51 insertions(+), 16 deletions(-) create mode 100644 db/migrate/20260513135320_add_defaults_for_data_dumps.rb diff --git a/app/jobs/data_dump_job.rb b/app/jobs/data_dump_job.rb index 3f0016ba0..f6909e614 100644 --- a/app/jobs/data_dump_job.rb +++ b/app/jobs/data_dump_job.rb @@ -5,25 +5,48 @@ def perform(*args) permitted = YAML.safe_load(File.read(Rails.root.join('db/scripts/dump_permitted_columns.yml'))) logger.info "Found #{permitted&.size} tables to dump." - # Create backup database - # Mirror DB structure (mysqldump | mysql?) + begin + exec('SET FOREIGN_KEY_CHECKS = 0;') + exec('DROP DATABASE qpixel_dump;') + exec('CREATE DATABASE qpixel_dump;') - permitted&.each do |table, data| - results = pull_table_data(table, data) - end + @db_creds = Rails.configuration.database_configuration[Rails.env] + @username = @db_creds['username'] + @password = @db_creds['password'] + @database = @db_creds['database'] + + mysqldump_command = "mysqldump -u #{@username} -p#{@password} -d #{@database} --no-tablespaces" + mysql_command = "mysql -u #{@username} -p#{@password} -D qpixel_dump" + copy_success = system("#{mysqldump_command} | #{mysql_command}") + + unless copy_success + logger.fatal "Couldn't replicate database: nonzero exit code" + return + end + + permitted&.each do |table, data| + migrate_table(table, data) + end - # Dump all of the results into the backup DB - # Export backup DB to file - # Upload dump somewhere - # Create Dump record - # Delete backup DB + # Export backup DB to file + # Upload dump somewhere + # Create Dump record + # Delete backup DB + ensure + exec('SET FOREIGN_KEY_CHECKS = 1;') + end end - def pull_table_data(table, data) + def migrate_table(table, data) columns = data['columns'] query = data['query'] - full_query = "SELECT #{columns.map { |c| "#{table}.#{c}" }.join(', ')} FROM #{table} #{query}" + select = "(SELECT #{columns.map { |c| "`#{table}`.`#{c}`" }.join(', ')} FROM #{@database}.#{table} #{query})" + full_query = "INSERT INTO qpixel_dump.`#{table}` (#{columns.map { |c| "`#{c}`" }.join(', ')}) #{select}" logger.debug full_query - ApplicationRecord.connection.execute(full_query).to_a + exec(full_query) + end + + def exec(sql) + ApplicationRecord.connection.execute(sql) end end diff --git a/db/migrate/20260513135320_add_defaults_for_data_dumps.rb b/db/migrate/20260513135320_add_defaults_for_data_dumps.rb new file mode 100644 index 000000000..5b2014b40 --- /dev/null +++ b/db/migrate/20260513135320_add_defaults_for_data_dumps.rb @@ -0,0 +1,12 @@ +class AddDefaultsForDataDumps < ActiveRecord::Migration[7.2] + def change + # Add a bunch of default values for NOT NULL columns that don't already have them, so that data dumps don't break + # when the data in these columns isn't included. Only applies to NOT NULL columns that are NOT included in the dump, + # and do not already have a default value. + change_column_default :filters, :user_id, -1 + change_column_default :flags, :escalated, false + change_column_default :users, :sign_in_count, 0 + change_column_default :users, :failed_attempts, 0 + change_column_default :users, :deleted, false + end +end diff --git a/db/schema.rb b/db/schema.rb index 7c7abca20..2f37bf21c 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.2].define(version: 2026_03_22_151439) do +ActiveRecord::Schema[7.2].define(version: 2026_05_13_135320) do create_table "abilities", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t| t.bigint "community_id" t.string "name" @@ -301,7 +301,7 @@ end create_table "filters", charset: "utf8mb4", collation: "utf8mb4_unicode_ci", force: :cascade do |t| - t.bigint "user_id", null: false + t.bigint "user_id", default: -1, null: false t.string "name", null: false t.float "min_score" t.float "max_score" diff --git a/db/scripts/dump_permitted_columns.yml b/db/scripts/dump_permitted_columns.yml index 14a2ce5c8..a425ce250 100644 --- a/db/scripts/dump_permitted_columns.yml +++ b/db/scripts/dump_permitted_columns.yml @@ -180,7 +180,7 @@ flags: - post_flag_type_id - post_type query: > - INNER JOIN post_flag_type pft ON pft.id = flags.post_flag_type_id + INNER JOIN post_flag_types pft ON pft.id = flags.post_flag_type_id WHERE pft.confidential = FALSE licenses: From 25aed982c0ca9fff04a318eea71cd695793f3880 Mon Sep 17 00:00:00 2001 From: ArtOfCode- Date: Wed, 13 May 2026 17:06:23 +0100 Subject: [PATCH 12/36] Export data and create Dump record --- app/jobs/data_dump_job.rb | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/app/jobs/data_dump_job.rb b/app/jobs/data_dump_job.rb index f6909e614..5a7500581 100644 --- a/app/jobs/data_dump_job.rb +++ b/app/jobs/data_dump_job.rb @@ -24,15 +24,30 @@ def perform(*args) return end + logger.info 'Copied database structure.' + permitted&.each do |table, data| migrate_table(table, data) end - # Export backup DB to file - # Upload dump somewhere - # Create Dump record - # Delete backup DB + logger.info 'Migrated data.' + + file_path = Rails.root.join('tmp/qpixel_export.sql') + export_cmd = "mysqldump -u #{@username} -p#{@password} qpixel_dump --no-tablespaces > #{file_path}" + export_success = system(export_cmd) + + unless export_success + logger.fatal "Couldn't export database: nonzero exit code" + return + end + + logger.info 'Exported database.' + + Dump.create(title: "Data Dump #{Time.now.strftime('%Y-%m-%d')}", + comment: "Automatically generated data dump as of #{Time.now.strftime('%Y-%m-%d %H:%M:%S')}.", + file: File.open(file_path)) ensure + exec('DROP DATABASE qpixel_dump;') exec('SET FOREIGN_KEY_CHECKS = 1;') end end From 46b24305f214214ef70482d59d4c2caeabd7daf0 Mon Sep 17 00:00:00 2001 From: ArtOfCode- Date: Wed, 13 May 2026 17:55:58 +0100 Subject: [PATCH 13/36] Dump management --- app/jobs/data_dump_job.rb | 10 ++++++---- app/models/dump.rb | 8 ++++++++ db/migrate/20260513160917_add_automatic_to_dumps.rb | 5 +++++ db/schema.rb | 3 ++- 4 files changed, 21 insertions(+), 5 deletions(-) create mode 100644 db/migrate/20260513160917_add_automatic_to_dumps.rb diff --git a/app/jobs/data_dump_job.rb b/app/jobs/data_dump_job.rb index 5a7500581..7a70f846c 100644 --- a/app/jobs/data_dump_job.rb +++ b/app/jobs/data_dump_job.rb @@ -7,7 +7,7 @@ def perform(*args) begin exec('SET FOREIGN_KEY_CHECKS = 0;') - exec('DROP DATABASE qpixel_dump;') + exec('DROP DATABASE IF EXISTS qpixel_dump;') exec('CREATE DATABASE qpixel_dump;') @db_creds = Rails.configuration.database_configuration[Rails.env] @@ -43,9 +43,11 @@ def perform(*args) logger.info 'Exported database.' - Dump.create(title: "Data Dump #{Time.now.strftime('%Y-%m-%d')}", - comment: "Automatically generated data dump as of #{Time.now.strftime('%Y-%m-%d %H:%M:%S')}.", - file: File.open(file_path)) + dump = Dump.create(title: "Data Dump #{Time.now.strftime('%Y-%m-%d')}", + comment: "Automatically generated data dump as of #{Time.now.strftime('%Y-%m-%d %H:%M:%S')}.", + file: File.open(file_path), + automatic: true) + Dump.where(automatic: true).where.not(id: dump.id).destroy_all ensure exec('DROP DATABASE qpixel_dump;') exec('SET FOREIGN_KEY_CHECKS = 1;') diff --git a/app/models/dump.rb b/app/models/dump.rb index 10ac3960e..a36faaf6e 100644 --- a/app/models/dump.rb +++ b/app/models/dump.rb @@ -1,3 +1,11 @@ class Dump < ApplicationRecord has_one_attached :file + + before_destroy :delete_file + + private + + def delete_file + file.purge + end end diff --git a/db/migrate/20260513160917_add_automatic_to_dumps.rb b/db/migrate/20260513160917_add_automatic_to_dumps.rb new file mode 100644 index 000000000..c5b5f9b32 --- /dev/null +++ b/db/migrate/20260513160917_add_automatic_to_dumps.rb @@ -0,0 +1,5 @@ +class AddAutomaticToDumps < ActiveRecord::Migration[7.2] + def change + add_column :dumps, :automatic, :boolean, null: false, default: false + end +end diff --git a/db/schema.rb b/db/schema.rb index 2f37bf21c..9d61c6a72 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.2].define(version: 2026_05_13_135320) do +ActiveRecord::Schema[7.2].define(version: 2026_05_13_160917) do create_table "abilities", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t| t.bigint "community_id" t.string "name" @@ -273,6 +273,7 @@ t.string "comment" t.datetime "created_at", null: false t.datetime "updated_at", null: false + t.boolean "automatic", default: false, null: false end create_table "email_logs", charset: "utf8mb4", collation: "utf8mb4_unicode_ci", force: :cascade do |t| From 9052b90c33036d760320ae2010c2b659605b14dc Mon Sep 17 00:00:00 2001 From: ArtOfCode- Date: Wed, 13 May 2026 19:29:28 +0100 Subject: [PATCH 14/36] Rubocop --- app/jobs/data_dump_job.rb | 4 ++-- test/jobs/data_dump_job_test.rb | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/app/jobs/data_dump_job.rb b/app/jobs/data_dump_job.rb index 7a70f846c..b5ee408a3 100644 --- a/app/jobs/data_dump_job.rb +++ b/app/jobs/data_dump_job.rb @@ -1,8 +1,8 @@ class DataDumpJob < ApplicationJob queue_as :default - def perform(*args) - permitted = YAML.safe_load(File.read(Rails.root.join('db/scripts/dump_permitted_columns.yml'))) + def perform(*_args) + permitted = YAML.safe_load_file(Rails.root.join('db/scripts/dump_permitted_columns.yml')) logger.info "Found #{permitted&.size} tables to dump." begin diff --git a/test/jobs/data_dump_job_test.rb b/test/jobs/data_dump_job_test.rb index fb6b0030e..5c512ed26 100644 --- a/test/jobs/data_dump_job_test.rb +++ b/test/jobs/data_dump_job_test.rb @@ -1,4 +1,4 @@ -require "test_helper" +require 'test_helper' class DataDumpJobTest < ActiveJob::TestCase # test "the truth" do From a2be721fb3055f92ead816b8a5c4c42af83a45dd Mon Sep 17 00:00:00 2001 From: ArtOfCode- Date: Wed, 13 May 2026 20:53:45 +0100 Subject: [PATCH 15/36] Add data page --- app/controllers/dumps_controller.rb | 6 ++ app/models/dump.rb | 3 + app/views/dumps/index.html.erb | 66 +++++++++++++++++++ app/views/dumps/index.htrml.erb | 0 config/routes.rb | 4 ++ config/schedule.rb | 4 ++ .../20260513185013_add_link_to_dumps.rb | 5 ++ db/schema.rb | 3 +- scripts/data_dump.rb | 1 + 9 files changed, 91 insertions(+), 1 deletion(-) create mode 100644 app/views/dumps/index.html.erb delete mode 100644 app/views/dumps/index.htrml.erb create mode 100644 db/migrate/20260513185013_add_link_to_dumps.rb create mode 100644 scripts/data_dump.rb diff --git a/app/controllers/dumps_controller.rb b/app/controllers/dumps_controller.rb index 80f2b2669..41b461020 100644 --- a/app/controllers/dumps_controller.rb +++ b/app/controllers/dumps_controller.rb @@ -1,2 +1,8 @@ class DumpsController < ApplicationController + before_action :authenticate_user! + + def index + @latest = Dump.automatic.last + @others = Dump.manual + end end diff --git a/app/models/dump.rb b/app/models/dump.rb index a36faaf6e..8d60eee3b 100644 --- a/app/models/dump.rb +++ b/app/models/dump.rb @@ -3,6 +3,9 @@ class Dump < ApplicationRecord before_destroy :delete_file + scope :automatic, -> { where(automatic: true) } + scope :manual, -> { where(automatic: false) } + private def delete_file diff --git a/app/views/dumps/index.html.erb b/app/views/dumps/index.html.erb new file mode 100644 index 000000000..11c63d3af --- /dev/null +++ b/app/views/dumps/index.html.erb @@ -0,0 +1,66 @@ +

Data Dumps

+ +

+ Data from all <%= SiteSetting['NetworkName'] %> communities is made available in database format for download here. + This data is a weekly export of the entire database, minus any personally identifiable information, moderation data, + and some other sensitive information such as who cast votes. +

+ +
+

+ + Licensing +

+

+ This data is provided free of charge as part of our contribution to the commons. If you use post content, you must + still abide by the terms of the licenses set by the author of each post. +

+
+ +

Latest data dump

+ + + + + + + + + + + + + + + + + +
FilenameCreatedDownload
<%= @latest.title %><%= @latest.file.filename %><%= @latest.created_at.strftime('%Y-%m-%d') %><%= link_to 'Download', rails_blob_path(@latest.file, disposition: 'attachment') %>
+ +<% if @others.any? %> +

Other data dumps

+ + + + + + + + + + <% @others.each do |dump| %> + + + + + + <% end %> + +
CreatedDownload
<%= dump.title %><%= dump.created_at.strftime('%Y-%m-%d') %> + <% if dump.file.attached? %> + <%= link_to 'Download', rails_blob_path(@latest.file, disposition: 'attachment') %> + <% elsif dump.link.present? %> + <%= link_to 'View', dump.link %> + <% end %> +
+<% end %> diff --git a/app/views/dumps/index.htrml.erb b/app/views/dumps/index.htrml.erb deleted file mode 100644 index e69de29bb..000000000 diff --git a/config/routes.rb b/config/routes.rb index fc4c5ca46..6f4220c4a 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -407,4 +407,8 @@ scope 'network' do root to: 'fake_community#communities', as: :fc_communities end + + scope 'data' do + root to: 'dumps#index', as: :data_dumps + end end diff --git a/config/schedule.rb b/config/schedule.rb index d8472d7b7..d8ffb32de 100644 --- a/config/schedule.rb +++ b/config/schedule.rb @@ -34,6 +34,10 @@ runner 'scripts/run_new_thread_followers_cleanup.rb' end +every 7.days, at: '04:00' do + runner 'scripts/data_dump.rb' +end + every 6.hours do runner 'scripts/recalc_abilities.rb' end diff --git a/db/migrate/20260513185013_add_link_to_dumps.rb b/db/migrate/20260513185013_add_link_to_dumps.rb new file mode 100644 index 000000000..d74d7c0b8 --- /dev/null +++ b/db/migrate/20260513185013_add_link_to_dumps.rb @@ -0,0 +1,5 @@ +class AddLinkToDumps < ActiveRecord::Migration[7.2] + def change + add_column :dumps, :link, :string + end +end diff --git a/db/schema.rb b/db/schema.rb index 9d61c6a72..e66a38914 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.2].define(version: 2026_05_13_160917) do +ActiveRecord::Schema[7.2].define(version: 2026_05_13_185013) do create_table "abilities", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t| t.bigint "community_id" t.string "name" @@ -274,6 +274,7 @@ t.datetime "created_at", null: false t.datetime "updated_at", null: false t.boolean "automatic", default: false, null: false + t.string "link" end create_table "email_logs", charset: "utf8mb4", collation: "utf8mb4_unicode_ci", force: :cascade do |t| diff --git a/scripts/data_dump.rb b/scripts/data_dump.rb new file mode 100644 index 000000000..c37413ce7 --- /dev/null +++ b/scripts/data_dump.rb @@ -0,0 +1 @@ +DataDumpJob.perform_later From d15877c995ed9bfdc400fe948b0327ce96989abc Mon Sep 17 00:00:00 2001 From: ArtOfCode- Date: Thu, 14 May 2026 14:02:51 +0100 Subject: [PATCH 16/36] Add link to footer --- app/views/layouts/_footer.html.erb | 1 + 1 file changed, 1 insertion(+) diff --git a/app/views/layouts/_footer.html.erb b/app/views/layouts/_footer.html.erb index 53cbcd0f9..75eab99e1 100644 --- a/app/views/layouts/_footer.html.erb +++ b/app/views/layouts/_footer.html.erb @@ -7,6 +7,7 @@
  • <%= link_to 'About Us', '/policy/network-faq' %>
  • <%= link_to 'Privacy & Safety Center', safety_center_url %>
  • <%= link_to 'Report harmful content', new_complaint_path %>
  • +
  • <%= link_to 'Data dumps', data_dumps_path %>
  • From a5f8276673cb268ca460c58258c9da214469744c Mon Sep 17 00:00:00 2001 From: ArtOfCode- Date: Thu, 14 May 2026 21:24:14 +0100 Subject: [PATCH 17/36] Test data dump job --- app/jobs/data_dump_job.rb | 6 ++- config/environments/test.rb | 2 +- test/jobs/data_dump_job_test.rb | 74 +++++++++++++++++++++++++++++++-- test/test_helper.rb | 3 ++ 4 files changed, 79 insertions(+), 6 deletions(-) diff --git a/app/jobs/data_dump_job.rb b/app/jobs/data_dump_job.rb index b5ee408a3..a9e1602d3 100644 --- a/app/jobs/data_dump_job.rb +++ b/app/jobs/data_dump_job.rb @@ -1,7 +1,7 @@ class DataDumpJob < ApplicationJob queue_as :default - def perform(*_args) + def perform(drop_db_after: true) permitted = YAML.safe_load_file(Rails.root.join('db/scripts/dump_permitted_columns.yml')) logger.info "Found #{permitted&.size} tables to dump." @@ -49,8 +49,10 @@ def perform(*_args) automatic: true) Dump.where(automatic: true).where.not(id: dump.id).destroy_all ensure - exec('DROP DATABASE qpixel_dump;') exec('SET FOREIGN_KEY_CHECKS = 1;') + if drop_db_after + exec('DROP DATABASE qpixel_dump;') + end end end diff --git a/config/environments/test.rb b/config/environments/test.rb index feb02ce6a..6e7e78d81 100644 --- a/config/environments/test.rb +++ b/config/environments/test.rb @@ -13,7 +13,7 @@ config.cache_classes = false config.action_view.cache_template_loading = true - config.log_level = :info + config.log_level = ENV['LOG_LEVEL'] || :info # Do not eager load code on boot. This avoids loading your whole application # just for the purpose of running a single test. If you are using a tool that diff --git a/test/jobs/data_dump_job_test.rb b/test/jobs/data_dump_job_test.rb index 5c512ed26..5d4e6c438 100644 --- a/test/jobs/data_dump_job_test.rb +++ b/test/jobs/data_dump_job_test.rb @@ -1,7 +1,75 @@ require 'test_helper' class DataDumpJobTest < ActiveJob::TestCase - # test "the truth" do - # assert true - # end + setup :i_know_better_than_activerecord + teardown :i_dont_know_better_than_activerecord + + test 'job runs successfully' do + perform_enqueued_jobs do + DataDumpJob.perform_later + end + assert_performed_jobs 1 + end + + test 'no excluded data present in final dump DB' do + perform_enqueued_jobs do + DataDumpJob.perform_later(drop_db_after: false) + end + + conn = ApplicationRecord.connection + all_columns = conn.tables.to_h { |t| [t, conn.columns(t).map(&:name)] } + permitted = YAML.safe_load_file(Rails.root.join('db/scripts/dump_permitted_columns.yml')) + excluded_cols = all_columns.to_h do |t, cols| + [ + t, + cols.reject { |c| permitted.include?(t) && permitted[t]['columns'].include?(c) } + ] + end.reject { |_t, cols| cols.empty? } + + excluded_cols.each do |table, cols| + query = "SELECT #{cols.map { |c| "`#{c}`" }.join(', ')} FROM qpixel_dump.`#{table}`" + results = conn.execute(query).to_a + results.transpose.each.with_index do |col, i| + # EITHER all values in the column should be nil, OR all values in the column should be identical (which implies + # a default value was applied), for us to be happy that there is no true data in the column. + assert col.all?(&:nil?) || col.uniq.size <= 1, "Distinct non-null data present in column `#{table}`.`#{cols[i]}`" + end + end + end + + private + + ## + # This is definitely not a terrible idea that will come back to bite me later on. + # For context, this is necessary because the +create+ call in DataDumpJob is run by ActiveRecord in a transaction + # (well, ish - using savepoints). However, the DDL modifications that the data dump performs automatically release + # the savepoints, which then causes the RELEASE query to fail. I think. There's no convenient way to have AR not run + # the +create+ call in a transaction, so we have to monkeypatch it out. + def i_know_better_than_activerecord + ActiveRecord::ConnectionAdapters::Mysql2Adapter.class_eval do + def begin_db_transaction(*) + end + + def commit_db_transaction(*) + end + + def create_savepoint(*) + end + + def rollback_to_savepoint(*) + end + + def release_savepoint(*) + end + end + end + + ## + # Let's not let the terrible idea affect everything else too. + def i_dont_know_better_than_activerecord + $LOADED_FEATURES.delete_if { |f| f.include?('mysql2_adapter') } + unless require 'active_record/connection_adapters/mysql2_adapter' + Rails.logger.warn "Couldn't re-require mysql2 adapter. Everything after this point will not run in transactions." + end + end end diff --git a/test/test_helper.rb b/test/test_helper.rb index 7befbb484..fa985b0a9 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -1,3 +1,6 @@ +# Silence the irritating already initialized constant warning +$VERBOSE = nil + require 'simplecov' require 'simplecov_json_formatter' SimpleCov.formatter = SimpleCov::Formatter::JSONFormatter From 73aa0953f7a6a8e0be72f5870d29dbb24cbc22cc Mon Sep 17 00:00:00 2001 From: ArtOfCode- Date: Thu, 14 May 2026 21:35:10 +0100 Subject: [PATCH 18/36] Forgot the host --- app/jobs/data_dump_job.rb | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/app/jobs/data_dump_job.rb b/app/jobs/data_dump_job.rb index a9e1602d3..94909b2fe 100644 --- a/app/jobs/data_dump_job.rb +++ b/app/jobs/data_dump_job.rb @@ -14,9 +14,10 @@ def perform(drop_db_after: true) @username = @db_creds['username'] @password = @db_creds['password'] @database = @db_creds['database'] + @host = @db_creds['host'] - mysqldump_command = "mysqldump -u #{@username} -p#{@password} -d #{@database} --no-tablespaces" - mysql_command = "mysql -u #{@username} -p#{@password} -D qpixel_dump" + mysqldump_command = "mysqldump -h #{@host} -u #{@username} -p#{@password} -d #{@database} --no-tablespaces" + mysql_command = "mysql -h #{@host} -u #{@username} -p#{@password} -D qpixel_dump" copy_success = system("#{mysqldump_command} | #{mysql_command}") unless copy_success @@ -33,7 +34,7 @@ def perform(drop_db_after: true) logger.info 'Migrated data.' file_path = Rails.root.join('tmp/qpixel_export.sql') - export_cmd = "mysqldump -u #{@username} -p#{@password} qpixel_dump --no-tablespaces > #{file_path}" + export_cmd = "mysqldump -h #{@host} -u #{@username} -p#{@password} qpixel_dump --no-tablespaces > #{file_path}" export_success = system(export_cmd) unless export_success From bf8b7a7993e4c3a09f86d009b7848ffae8e35c21 Mon Sep 17 00:00:00 2001 From: ArtOfCode- Date: Thu, 14 May 2026 21:48:58 +0100 Subject: [PATCH 19/36] Shooting in the dark now --- test/jobs/data_dump_job_test.rb | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/test/jobs/data_dump_job_test.rb b/test/jobs/data_dump_job_test.rb index 5d4e6c438..8b5b94a58 100644 --- a/test/jobs/data_dump_job_test.rb +++ b/test/jobs/data_dump_job_test.rb @@ -5,10 +5,9 @@ class DataDumpJobTest < ActiveJob::TestCase teardown :i_dont_know_better_than_activerecord test 'job runs successfully' do - perform_enqueued_jobs do + assert_performed_jobs 1 do DataDumpJob.perform_later end - assert_performed_jobs 1 end test 'no excluded data present in final dump DB' do @@ -46,20 +45,17 @@ class DataDumpJobTest < ActiveJob::TestCase # the savepoints, which then causes the RELEASE query to fail. I think. There's no convenient way to have AR not run # the +create+ call in a transaction, so we have to monkeypatch it out. def i_know_better_than_activerecord - ActiveRecord::ConnectionAdapters::Mysql2Adapter.class_eval do - def begin_db_transaction(*) - end - - def commit_db_transaction(*) - end - + ActiveRecord::ConnectionAdapters::AbstractAdapter.class_eval do def create_savepoint(*) + logger.warn 'create_savepoint ignored: this should only happen during data dump tests' end def rollback_to_savepoint(*) + logger.warn 'rollback_to_savepoint ignored: this should only happen during data dump tests' end def release_savepoint(*) + logger.warn 'release_savepoint ignored: this should only happen during data dump tests' end end end @@ -67,9 +63,8 @@ def release_savepoint(*) ## # Let's not let the terrible idea affect everything else too. def i_dont_know_better_than_activerecord - $LOADED_FEATURES.delete_if { |f| f.include?('mysql2_adapter') } - unless require 'active_record/connection_adapters/mysql2_adapter' - Rails.logger.warn "Couldn't re-require mysql2 adapter. Everything after this point will not run in transactions." + ActiveRecord::ConnectionAdapters::AbstractAdapter.class_eval do + include ActiveRecord::ConnectionAdapters::Savepoints end end end From eb35540a32f221f91af203bd4b31fd624b0677f4 Mon Sep 17 00:00:00 2001 From: ArtOfCode- Date: Thu, 14 May 2026 21:54:49 +0100 Subject: [PATCH 20/36] Not sure where you're getting 2 jobs from, minitest --- test/jobs/data_dump_job_test.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/jobs/data_dump_job_test.rb b/test/jobs/data_dump_job_test.rb index 8b5b94a58..c33167da5 100644 --- a/test/jobs/data_dump_job_test.rb +++ b/test/jobs/data_dump_job_test.rb @@ -11,7 +11,7 @@ class DataDumpJobTest < ActiveJob::TestCase end test 'no excluded data present in final dump DB' do - perform_enqueued_jobs do + assert_performed_jobs 1 do DataDumpJob.perform_later(drop_db_after: false) end From 4724e15439191cd9034d18b7b811539043bec782 Mon Sep 17 00:00:00 2001 From: ArtOfCode- Date: Thu, 14 May 2026 21:57:23 +0100 Subject: [PATCH 21/36] Look as long as it's _doing_ it... --- test/jobs/data_dump_job_test.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/jobs/data_dump_job_test.rb b/test/jobs/data_dump_job_test.rb index c33167da5..53405df51 100644 --- a/test/jobs/data_dump_job_test.rb +++ b/test/jobs/data_dump_job_test.rb @@ -5,13 +5,13 @@ class DataDumpJobTest < ActiveJob::TestCase teardown :i_dont_know_better_than_activerecord test 'job runs successfully' do - assert_performed_jobs 1 do + perform_enqueued_jobs do DataDumpJob.perform_later end end test 'no excluded data present in final dump DB' do - assert_performed_jobs 1 do + perform_enqueued_jobs do DataDumpJob.perform_later(drop_db_after: false) end From 1740d3c9c985768ea560b14d106598baedc37e10 Mon Sep 17 00:00:00 2001 From: ArtOfCode- Date: Thu, 14 May 2026 22:10:38 +0100 Subject: [PATCH 22/36] If this worked the whole time I'm going to be annoyed --- test/jobs/data_dump_job_test.rb | 35 +-------------------------------- 1 file changed, 1 insertion(+), 34 deletions(-) diff --git a/test/jobs/data_dump_job_test.rb b/test/jobs/data_dump_job_test.rb index 53405df51..eff11cf60 100644 --- a/test/jobs/data_dump_job_test.rb +++ b/test/jobs/data_dump_job_test.rb @@ -1,8 +1,7 @@ require 'test_helper' class DataDumpJobTest < ActiveJob::TestCase - setup :i_know_better_than_activerecord - teardown :i_dont_know_better_than_activerecord + self.use_transactional_tests = false test 'job runs successfully' do perform_enqueued_jobs do @@ -35,36 +34,4 @@ class DataDumpJobTest < ActiveJob::TestCase end end end - - private - - ## - # This is definitely not a terrible idea that will come back to bite me later on. - # For context, this is necessary because the +create+ call in DataDumpJob is run by ActiveRecord in a transaction - # (well, ish - using savepoints). However, the DDL modifications that the data dump performs automatically release - # the savepoints, which then causes the RELEASE query to fail. I think. There's no convenient way to have AR not run - # the +create+ call in a transaction, so we have to monkeypatch it out. - def i_know_better_than_activerecord - ActiveRecord::ConnectionAdapters::AbstractAdapter.class_eval do - def create_savepoint(*) - logger.warn 'create_savepoint ignored: this should only happen during data dump tests' - end - - def rollback_to_savepoint(*) - logger.warn 'rollback_to_savepoint ignored: this should only happen during data dump tests' - end - - def release_savepoint(*) - logger.warn 'release_savepoint ignored: this should only happen during data dump tests' - end - end - end - - ## - # Let's not let the terrible idea affect everything else too. - def i_dont_know_better_than_activerecord - ActiveRecord::ConnectionAdapters::AbstractAdapter.class_eval do - include ActiveRecord::ConnectionAdapters::Savepoints - end - end end From ff95085dc445118728af588216f97abf1f4ede72 Mon Sep 17 00:00:00 2001 From: ArtOfCode- Date: Thu, 14 May 2026 22:15:22 +0100 Subject: [PATCH 23/36] Thanks rubocop --- test/jobs/data_dump_job_test.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/jobs/data_dump_job_test.rb b/test/jobs/data_dump_job_test.rb index eff11cf60..ea36b10c4 100644 --- a/test/jobs/data_dump_job_test.rb +++ b/test/jobs/data_dump_job_test.rb @@ -22,7 +22,8 @@ class DataDumpJobTest < ActiveJob::TestCase t, cols.reject { |c| permitted.include?(t) && permitted[t]['columns'].include?(c) } ] - end.reject { |_t, cols| cols.empty? } + end + excluded_cols = excluded_cols.reject { |_t, cols| cols.empty? } excluded_cols.each do |table, cols| query = "SELECT #{cols.map { |c| "`#{c}`" }.join(', ')} FROM qpixel_dump.`#{table}`" From dd1df8e559fdf178c1ceff924e7740a459104c83 Mon Sep 17 00:00:00 2001 From: ArtOfCode- Date: Fri, 15 May 2026 13:39:44 +0100 Subject: [PATCH 24/36] Access denied error handling --- app/jobs/data_dump_job.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app/jobs/data_dump_job.rb b/app/jobs/data_dump_job.rb index 94909b2fe..321c8704a 100644 --- a/app/jobs/data_dump_job.rb +++ b/app/jobs/data_dump_job.rb @@ -49,6 +49,8 @@ def perform(drop_db_after: true) file: File.open(file_path), automatic: true) Dump.where(automatic: true).where.not(id: dump.id).destroy_all + rescue ActiveRecord::ConnectionError + logger.fatal "Couldn't connect to database. Have you run `GRANT ALL ON qpixel_dump.*` for your DB user?" ensure exec('SET FOREIGN_KEY_CHECKS = 1;') if drop_db_after From f64ea306bee5fe8e45f2f1a0cc1446f2c5d79a12 Mon Sep 17 00:00:00 2001 From: ArtOfCode- Date: Fri, 15 May 2026 13:41:14 +0100 Subject: [PATCH 25/36] Handle no-dumps-yet case --- app/views/dumps/index.html.erb | 16 ++++++++++------ app/views/dumps/index.htrml.erb | 0 2 files changed, 10 insertions(+), 6 deletions(-) delete mode 100644 app/views/dumps/index.htrml.erb diff --git a/app/views/dumps/index.html.erb b/app/views/dumps/index.html.erb index 11c63d3af..bf73f07e1 100644 --- a/app/views/dumps/index.html.erb +++ b/app/views/dumps/index.html.erb @@ -18,24 +18,28 @@

    Latest data dump

    - - +<% if @latest.nil? %> +

    No data dumps available yet. Check back next week.

    +<% else %> +
    + - - + + - -
    Filename Created Download
    <%= @latest.title %> <%= @latest.file.filename %> <%= @latest.created_at.strftime('%Y-%m-%d') %> <%= link_to 'Download', rails_blob_path(@latest.file, disposition: 'attachment') %>
    + + +<% end %> <% if @others.any? %>

    Other data dumps

    diff --git a/app/views/dumps/index.htrml.erb b/app/views/dumps/index.htrml.erb deleted file mode 100644 index e69de29bb..000000000 From a7318dc021acc1dd5827eb719fc13babe809eb10 Mon Sep 17 00:00:00 2001 From: Oaphi Date: Mon, 18 May 2026 20:17:58 +0100 Subject: [PATCH 26/36] Apply suggested patch Signed-off-by: ArtOfCode- --- INSTALLATION.md | 1 + docker-compose.yml | 2 +- docker/Dockerfile.db | 3 +++ docker/db-entrypoint.sh | 5 +++++ docker/mysql-init.sql | 1 + 5 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 docker/db-entrypoint.sh diff --git a/INSTALLATION.md b/INSTALLATION.md index 703662612..db719de5a 100644 --- a/INSTALLATION.md +++ b/INSTALLATION.md @@ -131,6 +131,7 @@ the MySQL server with `sudo mysql -u root` and create a new database user for QP CREATE USER qpixel@localhost IDENTIFIED BY 'choose_a_password_here'; GRANT ALL ON qpixel_dev.* TO qpixel@localhost; GRANT ALL ON qpixel_test.* TO qpixel@localhost; +GRANT ALL ON qpixel_dump.* TO qpixel@localhost; GRANT ALL ON qpixel.* TO qpixel@localhost; ``` diff --git a/docker-compose.yml b/docker-compose.yml index 556c4b818..846523ef2 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -5,10 +5,10 @@ services: context: "." dockerfile: docker/Dockerfile.db volumes: + - ./docker/db-entrypoint.sh:/docker/entrypoint.sh - ./docker/mysql:/var/lib/mysql env_file: - ${ENV_FILE_LOCATION} - command: mysqld --mysql-native-password=on --skip-mysqlx cap_add: - SYS_NICE healthcheck: diff --git a/docker/Dockerfile.db b/docker/Dockerfile.db index 41875ec29..29a6d7275 100644 --- a/docker/Dockerfile.db +++ b/docker/Dockerfile.db @@ -4,3 +4,6 @@ FROM mysql:8.4.2 # These commands will be run on init of the container COPY docker/mysql-init.sql /docker-entrypoint-initdb.d/mysql-init.sql + +ENTRYPOINT ["/bin/bash"] +CMD ["/docker/entrypoint.sh"] diff --git a/docker/db-entrypoint.sh b/docker/db-entrypoint.sh new file mode 100644 index 000000000..5c7885682 --- /dev/null +++ b/docker/db-entrypoint.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +mysqld --init-file=/docker-entrypoint-initdb.d/mysql-init.sql \ + --mysql-native-password=on \ + --skip-mysqlx diff --git a/docker/mysql-init.sql b/docker/mysql-init.sql index 510059bb9..521fda2bf 100644 --- a/docker/mysql-init.sql +++ b/docker/mysql-init.sql @@ -3,4 +3,5 @@ if you change your environment file, you need to update database names here */ GRANT ALL ON qpixel_dev.* TO qpixel; GRANT ALL ON qpixel_test.* TO qpixel; +GRANT ALL ON qpixel_dump.* TO qpixel; GRANT ALL ON qpixel.* TO qpixel; From 10c7ec5548220c9afb60191d37ab0afa683015bc Mon Sep 17 00:00:00 2001 From: ArtOfCode- Date: Mon, 18 May 2026 20:23:26 +0100 Subject: [PATCH 27/36] Correct error class --- app/jobs/data_dump_job.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/jobs/data_dump_job.rb b/app/jobs/data_dump_job.rb index 321c8704a..84e6c696e 100644 --- a/app/jobs/data_dump_job.rb +++ b/app/jobs/data_dump_job.rb @@ -49,7 +49,7 @@ def perform(drop_db_after: true) file: File.open(file_path), automatic: true) Dump.where(automatic: true).where.not(id: dump.id).destroy_all - rescue ActiveRecord::ConnectionError + rescue ActiveRecord::ConnectionFailed logger.fatal "Couldn't connect to database. Have you run `GRANT ALL ON qpixel_dump.*` for your DB user?" ensure exec('SET FOREIGN_KEY_CHECKS = 1;') From 05bc65dfe7f923ac640a1b7315e168289a7509d7 Mon Sep 17 00:00:00 2001 From: ArtOfCode- Date: Mon, 18 May 2026 20:46:08 +0100 Subject: [PATCH 28/36] Add port and SSL state to commands --- app/jobs/data_dump_job.rb | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/app/jobs/data_dump_job.rb b/app/jobs/data_dump_job.rb index 84e6c696e..b334d9a62 100644 --- a/app/jobs/data_dump_job.rb +++ b/app/jobs/data_dump_job.rb @@ -14,10 +14,15 @@ def perform(drop_db_after: true) @username = @db_creds['username'] @password = @db_creds['password'] @database = @db_creds['database'] + @port = @db_creds['port'] @host = @db_creds['host'] - mysqldump_command = "mysqldump -h #{@host} -u #{@username} -p#{@password} -d #{@database} --no-tablespaces" - mysql_command = "mysql -h #{@host} -u #{@username} -p#{@password} -D qpixel_dump" + mysqldump_command = build_command('mysqldump', '-h', @host, '-u', @username, "-p#{@password}", @database, + '--no-tablespaces', "--port=#{@port}", ssl_state) + mysql_command = build_command('mysql', '-h', @host, '-u', @username, "-p#{@password}", "--port=#{@port}", + '-D', 'qpixel_dump', ssl_state) + logger.debug 'Running system command:' + logger.debug "#{mysqldump_command} | #{mysql_command}" copy_success = system("#{mysqldump_command} | #{mysql_command}") unless copy_success @@ -34,7 +39,10 @@ def perform(drop_db_after: true) logger.info 'Migrated data.' file_path = Rails.root.join('tmp/qpixel_export.sql') - export_cmd = "mysqldump -h #{@host} -u #{@username} -p#{@password} qpixel_dump --no-tablespaces > #{file_path}" + export_cmd = build_command('mysqldump', '-h', @host, '-u', @username, "-p#{@password}", "--port=#{@port}", + 'qpixel_dump', '--no-tablespaces', ssl_state, '>', file_path) + logger.debug 'Running system command:' + logger.debug export_cmd export_success = system(export_cmd) unless export_success @@ -59,6 +67,8 @@ def perform(drop_db_after: true) end end + private + def migrate_table(table, data) columns = data['columns'] query = data['query'] @@ -71,4 +81,12 @@ def migrate_table(table, data) def exec(sql) ApplicationRecord.connection.execute(sql) end + + def build_command(cmd, *args) + "#{cmd} #{args.join(' ')}" + end + + def ssl_state + "--ssl=#{Rails.env.development? ? 'OFF' : 'ON'}" + end end From 3a2d349852f4d2d521689c728e53e5d522a7627d Mon Sep 17 00:00:00 2001 From: ArtOfCode- Date: Mon, 18 May 2026 21:05:51 +0100 Subject: [PATCH 29/36] Apparently that option's deprecated --- app/jobs/data_dump_job.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/jobs/data_dump_job.rb b/app/jobs/data_dump_job.rb index b334d9a62..d3ca16732 100644 --- a/app/jobs/data_dump_job.rb +++ b/app/jobs/data_dump_job.rb @@ -87,6 +87,6 @@ def build_command(cmd, *args) end def ssl_state - "--ssl=#{Rails.env.development? ? 'OFF' : 'ON'}" + "--ssl-mode=#{Rails.env.development? ? 'DISABLED' : 'PREFERRED'}" end end From 5fe6017bc40882bbbaa33b3fa367b85f95129324 Mon Sep 17 00:00:00 2001 From: ArtOfCode- Date: Mon, 18 May 2026 22:11:24 +0100 Subject: [PATCH 30/36] Missed a flag e --- app/jobs/data_dump_job.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/jobs/data_dump_job.rb b/app/jobs/data_dump_job.rb index d3ca16732..ba620933c 100644 --- a/app/jobs/data_dump_job.rb +++ b/app/jobs/data_dump_job.rb @@ -17,7 +17,7 @@ def perform(drop_db_after: true) @port = @db_creds['port'] @host = @db_creds['host'] - mysqldump_command = build_command('mysqldump', '-h', @host, '-u', @username, "-p#{@password}", @database, + mysqldump_command = build_command('mysqldump', '-h', @host, '-u', @username, "-p#{@password}", '-d', @database, '--no-tablespaces', "--port=#{@port}", ssl_state) mysql_command = build_command('mysql', '-h', @host, '-u', @username, "-p#{@password}", "--port=#{@port}", '-D', 'qpixel_dump', ssl_state) From 8fcb57833f739977fb00d3836e6da6f039c3ae6e Mon Sep 17 00:00:00 2001 From: Oleg Valter Date: Tue, 19 May 2026 01:48:22 +0300 Subject: [PATCH 31/36] Fix 'unknown variable' error MariaDB's client doesn't support the `--ssl-mode` option --- app/jobs/data_dump_job.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/jobs/data_dump_job.rb b/app/jobs/data_dump_job.rb index ba620933c..b1af17713 100644 --- a/app/jobs/data_dump_job.rb +++ b/app/jobs/data_dump_job.rb @@ -83,10 +83,10 @@ def exec(sql) end def build_command(cmd, *args) - "#{cmd} #{args.join(' ')}" + "#{cmd} #{args.compact_blank.join(' ')}" end def ssl_state - "--ssl-mode=#{Rails.env.development? ? 'DISABLED' : 'PREFERRED'}" + return "--skip-ssl" if Rails.env.development? or Rails.env.test? end end From 7f7b8cb7a63bbae6544026c331d2b7f6a05afa24 Mon Sep 17 00:00:00 2001 From: Oleg Valter Date: Tue, 19 May 2026 01:53:56 +0300 Subject: [PATCH 32/36] Rubocop cleanup --- app/jobs/data_dump_job.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/jobs/data_dump_job.rb b/app/jobs/data_dump_job.rb index b1af17713..c425af480 100644 --- a/app/jobs/data_dump_job.rb +++ b/app/jobs/data_dump_job.rb @@ -87,6 +87,6 @@ def build_command(cmd, *args) end def ssl_state - return "--skip-ssl" if Rails.env.development? or Rails.env.test? + '--skip-ssl' if Rails.env.development? || Rails.env.test? end end From 443eca5f44d78cb775de98107bece890d1656d85 Mon Sep 17 00:00:00 2001 From: Oleg Valter Date: Tue, 19 May 2026 02:08:25 +0300 Subject: [PATCH 33/36] Update ci-cd's workflow MySQL image to sync with our Docker setup --- .github/workflows/ci-cd.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-cd.yml b/.github/workflows/ci-cd.yml index 9b9932067..6c0cf845e 100644 --- a/.github/workflows/ci-cd.yml +++ b/.github/workflows/ci-cd.yml @@ -62,7 +62,7 @@ jobs: services: mysql: &db-service - image: mysql:8.0 + image: mysql:8.4.2 # please keep the image in sync with Dockerfile.db env: MYSQL_ROOT_HOST: '%' MYSQL_ROOT_PASSWORD: 'root' From f309d1f002f02c23819bbe7177b3b257766435ff Mon Sep 17 00:00:00 2001 From: Oleg Valter Date: Tue, 19 May 2026 06:03:26 +0300 Subject: [PATCH 34/36] Remove outdated build comment from Dockerfile.db --- docker/Dockerfile.db | 2 -- 1 file changed, 2 deletions(-) diff --git a/docker/Dockerfile.db b/docker/Dockerfile.db index 29a6d7275..bc9a17213 100644 --- a/docker/Dockerfile.db +++ b/docker/Dockerfile.db @@ -1,7 +1,5 @@ FROM mysql:8.4.2 -# docker build -t qpixel_db -f docker/Dockerfile.db . - # These commands will be run on init of the container COPY docker/mysql-init.sql /docker-entrypoint-initdb.d/mysql-init.sql From 2c1d070fb24897cf73c5d331d453b4908c95142d Mon Sep 17 00:00:00 2001 From: Oleg Valter Date: Tue, 19 May 2026 06:09:38 +0300 Subject: [PATCH 35/36] Always report mysql & mysqldump versions during ci-cd runs --- .github/workflows/ci-cd.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/ci-cd.yml b/.github/workflows/ci-cd.yml index 6c0cf845e..016f61078 100644 --- a/.github/workflows/ci-cd.yml +++ b/.github/workflows/ci-cd.yml @@ -81,6 +81,10 @@ jobs: run: | sudo apt-get -qq update sudo apt-get -yqq install libmariadb-dev libmagickwand-dev + - name: Report database client versions + run: | + mysql --version + mysqldump --version - name: Setup Ruby uses: ruby/setup-ruby@v1 with: From 7b8f9bf95cd43e101a222677120f69bbe6ee17ae Mon Sep 17 00:00:00 2001 From: Oleg Valter Date: Tue, 19 May 2026 06:23:15 +0300 Subject: [PATCH 36/36] Synchornize mysql & mysqldump client versions --- .github/workflows/ci-cd.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci-cd.yml b/.github/workflows/ci-cd.yml index 016f61078..287409632 100644 --- a/.github/workflows/ci-cd.yml +++ b/.github/workflows/ci-cd.yml @@ -80,7 +80,9 @@ jobs: - name: Setup dependencies run: | sudo apt-get -qq update - sudo apt-get -yqq install libmariadb-dev libmagickwand-dev + sudo apt-get -yqq install libmariadb-dev \ + libmagickwand-dev \ + mariadb-client - name: Report database client versions run: | mysql --version