Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
58b2ea5
initial boilerplate for data dumps (mirror Metasmoke)
Oaphi Jan 20, 2026
88fd12d
dups should have a required title & optional comment
Oaphi Jan 20, 2026
5997829
initial boilerplate for data dump model
Oaphi Jan 20, 2026
8fca596
first data dump fixtures (fixing tests)
Oaphi Jan 20, 2026
efa5fa5
initial boilerplate for data dumps (mirror Metasmoke)
Oaphi Jan 20, 2026
540e07c
dups should have a required title & optional comment
Oaphi Jan 20, 2026
dec449a
initial boilerplate for data dump model
Oaphi Jan 20, 2026
bb6d5f8
first data dump fixtures (fixing tests)
Oaphi Jan 20, 2026
5aba30c
Create scaffolding for pulling data
ArtOfCode- May 12, 2026
96a1b4d
Skeleton processing
ArtOfCode- May 12, 2026
b35e5d1
Merge branch 'develop' into 0valt/1918/data-dump
ArtOfCode- May 12, 2026
47be0f2
Successfully copying data to dump database
ArtOfCode- May 13, 2026
25aed98
Export data and create Dump record
ArtOfCode- May 13, 2026
46b2430
Dump management
ArtOfCode- May 13, 2026
9052b90
Rubocop
ArtOfCode- May 13, 2026
a2be721
Add data page
ArtOfCode- May 13, 2026
2bece2e
Merge branch '0valt/1918/data-dump' into art/data-dumps
ArtOfCode- May 13, 2026
d15877c
Add link to footer
ArtOfCode- May 14, 2026
a5f8276
Test data dump job
ArtOfCode- May 14, 2026
73aa095
Forgot the host
ArtOfCode- May 14, 2026
bf8b7a7
Shooting in the dark now
ArtOfCode- May 14, 2026
eb35540
Not sure where you're getting 2 jobs from, minitest
ArtOfCode- May 14, 2026
4724e15
Look as long as it's _doing_ it...
ArtOfCode- May 14, 2026
1740d3c
If this worked the whole time I'm going to be annoyed
ArtOfCode- May 14, 2026
ff95085
Thanks rubocop
ArtOfCode- May 14, 2026
dd1df8e
Access denied error handling
ArtOfCode- May 15, 2026
f64ea30
Handle no-dumps-yet case
ArtOfCode- May 15, 2026
a7318dc
Apply suggested patch
Oaphi May 18, 2026
10c7ec5
Correct error class
ArtOfCode- May 18, 2026
05bc65d
Add port and SSL state to commands
ArtOfCode- May 18, 2026
3a2d349
Apparently that option's deprecated
ArtOfCode- May 18, 2026
5fe6017
Missed a flag
ArtOfCode- May 18, 2026
8fcb578
Fix 'unknown variable' error
Oaphi May 18, 2026
7f7b8cb
Rubocop cleanup
Oaphi May 18, 2026
443eca5
Update ci-cd's workflow MySQL image to sync with our Docker setup
Oaphi May 18, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci-cd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ jobs:

services:
mysql: &db-service
image: mysql:8.0
image: mysql:8.4.2 # please keep the image in sync with Dockerfile.db
env:
MYSQL_ROOT_HOST: '%'
MYSQL_ROOT_PASSWORD: 'root'
Expand Down
1 change: 1 addition & 0 deletions INSTALLATION.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ the MySQL server with `sudo mysql -u root` and create a new database user for QP
CREATE USER qpixel@localhost IDENTIFIED BY 'choose_a_password_here';
GRANT ALL ON qpixel_dev.* TO qpixel@localhost;
GRANT ALL ON qpixel_test.* TO qpixel@localhost;
GRANT ALL ON qpixel_dump.* TO qpixel@localhost;
GRANT ALL ON qpixel.* TO qpixel@localhost;
```

Expand Down
8 changes: 8 additions & 0 deletions app/controllers/dumps_controller.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
class DumpsController < ApplicationController
before_action :authenticate_user!

def index
@latest = Dump.automatic.last
@others = Dump.manual
end
end
2 changes: 2 additions & 0 deletions app/helpers/dumps_helper.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
module DumpsHelper
end
92 changes: 92 additions & 0 deletions app/jobs/data_dump_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
class DataDumpJob < ApplicationJob
queue_as :default

def perform(drop_db_after: true)
permitted = YAML.safe_load_file(Rails.root.join('db/scripts/dump_permitted_columns.yml'))
logger.info "Found #{permitted&.size} tables to dump."

begin
exec('SET FOREIGN_KEY_CHECKS = 0;')
exec('DROP DATABASE IF EXISTS qpixel_dump;')
exec('CREATE DATABASE qpixel_dump;')

@db_creds = Rails.configuration.database_configuration[Rails.env]
@username = @db_creds['username']
@password = @db_creds['password']
@database = @db_creds['database']
@port = @db_creds['port']
@host = @db_creds['host']

mysqldump_command = build_command('mysqldump', '-h', @host, '-u', @username, "-p#{@password}", '-d', @database,
'--no-tablespaces', "--port=#{@port}", ssl_state)
mysql_command = build_command('mysql', '-h', @host, '-u', @username, "-p#{@password}", "--port=#{@port}",
'-D', 'qpixel_dump', ssl_state)
logger.debug 'Running system command:'
logger.debug "#{mysqldump_command} | #{mysql_command}"
copy_success = system("#{mysqldump_command} | #{mysql_command}")

unless copy_success
logger.fatal "Couldn't replicate database: nonzero exit code"
return
end

logger.info 'Copied database structure.'

permitted&.each do |table, data|
migrate_table(table, data)
end

logger.info 'Migrated data.'

file_path = Rails.root.join('tmp/qpixel_export.sql')
export_cmd = build_command('mysqldump', '-h', @host, '-u', @username, "-p#{@password}", "--port=#{@port}",
'qpixel_dump', '--no-tablespaces', ssl_state, '>', file_path)
logger.debug 'Running system command:'
logger.debug export_cmd
export_success = system(export_cmd)

unless export_success
logger.fatal "Couldn't export database: nonzero exit code"
return
end

logger.info 'Exported database.'

dump = Dump.create(title: "Data Dump #{Time.now.strftime('%Y-%m-%d')}",
comment: "Automatically generated data dump as of #{Time.now.strftime('%Y-%m-%d %H:%M:%S')}.",
file: File.open(file_path),
automatic: true)
Dump.where(automatic: true).where.not(id: dump.id).destroy_all
rescue ActiveRecord::ConnectionFailed
logger.fatal "Couldn't connect to database. Have you run `GRANT ALL ON qpixel_dump.*` for your DB user?"
ensure
exec('SET FOREIGN_KEY_CHECKS = 1;')
if drop_db_after
exec('DROP DATABASE qpixel_dump;')
end
end
end

private

def migrate_table(table, data)
columns = data['columns']
query = data['query']
select = "(SELECT #{columns.map { |c| "`#{table}`.`#{c}`" }.join(', ')} FROM #{@database}.#{table} #{query})"
full_query = "INSERT INTO qpixel_dump.`#{table}` (#{columns.map { |c| "`#{c}`" }.join(', ')}) #{select}"
logger.debug full_query
exec(full_query)
end

def exec(sql)
ApplicationRecord.connection.execute(sql)
end

def build_command(cmd, *args)
"#{cmd} #{args.compact_blank.join(' ')}"
end

def ssl_state
'--skip-ssl' if Rails.env.development? || Rails.env.test?
end
end
14 changes: 14 additions & 0 deletions app/models/dump.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
class Dump < ApplicationRecord
has_one_attached :file

before_destroy :delete_file

scope :automatic, -> { where(automatic: true) }
scope :manual, -> { where(automatic: false) }

private

def delete_file
file.purge
end
end
70 changes: 70 additions & 0 deletions app/views/dumps/index.html.erb
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
<h1>Data Dumps</h1>

<p>
Data from all <%= SiteSetting['NetworkName'] %> communities is made available in database format for download here.
This data is a weekly export of the entire database, minus any personally identifiable information, moderation data,
and some other sensitive information such as who cast votes.
</p>

<div class="notice is-info has-color-tertiary-900">
<p>
<i class="fas fa-balance-scale"></i>
<strong>Licensing</strong>
</p>
<p>
This data is provided free of charge as part of our contribution to the commons. If you use post content, you must
still abide by the terms of the licenses set by the author of each post.
</p>
</div>

<h2>Latest data dump</h2>
<% if @latest.nil? %>
<p>No data dumps available yet. Check back next week.</p>
<% else %>
<table class="table is-with-hover is-full-width">
<thead>
<tr>
<th></th>
<th>Filename</th>
<th>Created</th>
<th>Download</th>
</tr>
</thead>
<tbody>
<tr>
<td><%= @latest.title %></td>
<td><%= @latest.file.filename %></td>
<td><%= @latest.created_at.strftime('%Y-%m-%d') %></td>
<td><%= link_to 'Download', rails_blob_path(@latest.file, disposition: 'attachment') %></td>
</tr>
</tbody>
</table>
<% end %>

<% if @others.any? %>
<h2>Other data dumps</h2>
<table class="table is-with-hover is-full-width">
<thead>
<tr>
<th></th>
<th>Created</th>
<th>Download</th>
</tr>
</thead>
<tbody>
<% @others.each do |dump| %>
<tr>
<td><%= dump.title %></td>
<td><%= dump.created_at.strftime('%Y-%m-%d') %></td>
<td>
<% if dump.file.attached? %>
<%= link_to 'Download', rails_blob_path(@latest.file, disposition: 'attachment') %>
<% elsif dump.link.present? %>
<%= link_to 'View', dump.link %>
<% end %>
</td>
</tr>
<% end %>
</tbody>
</table>
<% end %>
1 change: 1 addition & 0 deletions app/views/layouts/_footer.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
<li><%= link_to 'About Us', '/policy/network-faq' %></li>
<li><%= link_to 'Privacy & Safety Center', safety_center_url %></li>
<li><%= link_to 'Report harmful content', new_complaint_path %></li>
<li><%= link_to 'Data dumps', data_dumps_path %></li>
</ul>
</div>
<div class="grid--cell is-6 is-12-md is-12-sm">
Expand Down
2 changes: 1 addition & 1 deletion config/environments/test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
config.cache_classes = false
config.action_view.cache_template_loading = true

config.log_level = :info
config.log_level = ENV['LOG_LEVEL'] || :info

# Do not eager load code on boot. This avoids loading your whole application
# just for the purpose of running a single test. If you are using a tool that
Expand Down
4 changes: 4 additions & 0 deletions config/routes.rb
Original file line number Diff line number Diff line change
Expand Up @@ -407,4 +407,8 @@
scope 'network' do
root to: 'fake_community#communities', as: :fc_communities
end

scope 'data' do
root to: 'dumps#index', as: :data_dumps
end
end
4 changes: 4 additions & 0 deletions config/schedule.rb
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@
runner 'scripts/run_new_thread_followers_cleanup.rb'
end

every 7.days, at: '04:00' do
runner 'scripts/data_dump.rb'
end

every 6.hours do
runner 'scripts/recalc_abilities.rb'
end
Expand Down
10 changes: 10 additions & 0 deletions db/migrate/20260120032240_create_dumps.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
class CreateDumps < ActiveRecord::Migration[7.2]
def change
create_table :dumps do |t|
t.string :title, null: false
t.string :comment

t.timestamps
end
end
end
12 changes: 12 additions & 0 deletions db/migrate/20260513135320_add_defaults_for_data_dumps.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
class AddDefaultsForDataDumps < ActiveRecord::Migration[7.2]
def change
# Add a bunch of default values for NOT NULL columns that don't already have them, so that data dumps don't break
# when the data in these columns isn't included. Only applies to NOT NULL columns that are NOT included in the dump,
# and do not already have a default value.
change_column_default :filters, :user_id, -1
change_column_default :flags, :escalated, false
change_column_default :users, :sign_in_count, 0
change_column_default :users, :failed_attempts, 0
change_column_default :users, :deleted, false
end
end
5 changes: 5 additions & 0 deletions db/migrate/20260513160917_add_automatic_to_dumps.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
class AddAutomaticToDumps < ActiveRecord::Migration[7.2]
def change
add_column :dumps, :automatic, :boolean, null: false, default: false
end
end
5 changes: 5 additions & 0 deletions db/migrate/20260513185013_add_link_to_dumps.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
class AddLinkToDumps < ActiveRecord::Migration[7.2]
def change
add_column :dumps, :link, :string
end
end
13 changes: 11 additions & 2 deletions db/schema.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.

ActiveRecord::Schema[7.2].define(version: 2026_03_22_151439) do
ActiveRecord::Schema[7.2].define(version: 2026_05_13_185013) do
create_table "abilities", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t|
t.bigint "community_id"
t.string "name"
Expand Down Expand Up @@ -268,6 +268,15 @@
t.index ["user_id"], name: "index_complaints_on_user_id"
end

create_table "dumps", charset: "utf8mb4", collation: "utf8mb4_unicode_ci", force: :cascade do |t|
t.string "title", null: false
t.string "comment"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.boolean "automatic", default: false, null: false
t.string "link"
end

create_table "email_logs", charset: "utf8mb4", collation: "utf8mb4_unicode_ci", force: :cascade do |t|
t.string "log_type"
t.string "destination"
Expand All @@ -294,7 +303,7 @@
end

create_table "filters", charset: "utf8mb4", collation: "utf8mb4_unicode_ci", force: :cascade do |t|
t.bigint "user_id", null: false
t.bigint "user_id", default: -1, null: false
t.string "name", null: false
t.float "min_score"
t.float "max_score"
Expand Down
Loading
Loading