diff --git a/Gemfile b/Gemfile index 5ac084766..0b7cef728 100644 --- a/Gemfile +++ b/Gemfile @@ -2,6 +2,7 @@ source 'https://rubygems.org' ruby '>= 3.3', '< 3.5' # Essential gems: servers, adapters, Rails + Rails requirements +gem 'bcrypt', '~> 3.1' gem 'coffee-rails', '~> 5.0.0' gem 'connection_pool', '< 3.0' # mperham/connection_pool#210 gem 'counter_culture', '~> 3.2' @@ -38,6 +39,7 @@ gem 'groupdate', '~> 6.1' # View stuff. gem 'diffy', '~> 3.4' +gem 'ipaddress', '~> 0.8' gem 'jbuilder', '~> 2.11' gem 'rqrcode', '~> 2.1' gem 'will_paginate', '~> 3.3' diff --git a/Gemfile.lock b/Gemfile.lock index 46abd4a4d..e4aaccc71 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -181,6 +181,7 @@ GEM mini_magick (>= 4.9.5, < 6) ruby-vips (>= 2.0.17, < 3) io-console (0.8.2) + ipaddress (0.8.3) irb (1.17.0) pp (>= 0.6.0) prism (>= 1.3.0) @@ -269,6 +270,7 @@ GEM rack (>= 2.2.3) rack-protection orm_adapter (0.5.0) + ostruct (0.6.3) parallel (2.0.1) parser (3.3.11.1) ast (~> 2.4.1) @@ -393,6 +395,9 @@ GEM rubocop-rake (0.7.1) lint_roller (~> 1.1) rubocop (>= 1.72.1) + ruby-prof (2.0.4) + base64 + ostruct ruby-progressbar (1.13.0) ruby-saml (1.18.1) nokogiri (>= 1.13.10) @@ -497,6 +502,7 @@ DEPENDENCIES aws-sdk-s3 (~> 1.208) aws-sdk-sns (~> 1.72) aws-ses-v4 + bcrypt (~> 3.1) byebug (~> 11.1) capybara (~> 3.38) chartkick (~> 4.2) @@ -512,6 +518,7 @@ DEPENDENCIES flamegraph (~> 0.9) groupdate (~> 6.1) image_processing (~> 1.12) + ipaddress (~> 0.8) jbuilder (~> 2.11) jquery-rails (~> 4.5.0) letter_opener_web (~> 2.0) @@ -540,6 +547,7 @@ DEPENDENCIES rubocop (~> 1) rubocop-rails (~> 2.15) rubocop-rake (~> 0.7.1) + ruby-prof (~> 2.0) ruby-progressbar (~> 1.11) sass-rails (~> 6.0) selenium-webdriver (~> 4.7) diff --git a/app/assets/javascripts/moderator.js b/app/assets/javascripts/moderator.js index dad768bc4..3ad6e0b2c 100644 --- a/app/assets/javascripts/moderator.js +++ b/app/assets/javascripts/moderator.js @@ -47,4 +47,14 @@ $(() => { checkbox.checked = action === 'all'; }); }); + + QPixel.DOM.addSelectorListener('submit', '#pii-correlation-form', async (ev) => { + ev.preventDefault(); + + const targetId = document.querySelector('input[name="target_id"]').value; + const resp = await QPixel.fetch(`${location.pathname}?format=template&target_id=${targetId}`); + const html = await resp.text(); + + document.querySelector('.js-correlation-container').innerHTML = html; + }); }); diff --git a/app/controllers/moderator_controller.rb b/app/controllers/moderator_controller.rb index c2f649e7f..b2b00213d 100644 --- a/app/controllers/moderator_controller.rb +++ b/app/controllers/moderator_controller.rb @@ -89,6 +89,16 @@ def handle_spammy_users redirect_to mod_spammers_path end + def pii_correlation + @user = User.find(params[:id]) + respond_to do |format| + format.html + format.template do + @target = User.find_by(id: params[:target_id]) + end + end + end + private def set_post diff --git a/app/helpers/moderator_helper.rb b/app/helpers/moderator_helper.rb index a472c57ea..ff47355cd 100644 --- a/app/helpers/moderator_helper.rb +++ b/app/helpers/moderator_helper.rb @@ -16,4 +16,29 @@ def text_bg(cls, content = nil, **opts, &block) tag.span content, class: ["has-background-color-#{cls}", opts[:class]].join(' ') end end + + ## + # Split an IP address into an array of hashed octets (well, hexadecets for IPv6). + # @param ip [String] The IP address to process. + # @param salting_user [User] A user from which to source a salt for hashing. For hashes to be directly comparable, you + # must use the same user for each IP address you wish to compare, even if sourced from a different user. + # @return [[String, [String?]]] The IP address family, and an array of hashed octets. + def split_hash_ip(ip, salting_user) + begin + addr = IPAddress.parse(ip) + rescue ArgumentError + return ['', []] + end + splat = if addr.ipv6? + addr.hexs + else + addr.octets + end + salt = BCrypt::Password.new(salting_user.encrypted_password).salt + splat = splat.map { |p| Digest::SHA2.hexdigest(salt + p.to_s) } + [ + addr.ipv6? ? 'IPv6' : 'IPv4', + splat + ] + end end diff --git a/app/jobs/update_user_stats_job.rb b/app/jobs/update_user_stats_job.rb new file mode 100644 index 000000000..22146c4c9 --- /dev/null +++ b/app/jobs/update_user_stats_job.rb @@ -0,0 +1,11 @@ +class UpdateUserStatsJob < ApplicationJob + queue_as :default + + def perform(*) + domains = User.all.select(:email) + .group_by { |u| u.email&.split('@')[1] } + .to_h { |d, u| [d, u.size] } + .reject { |d, _u| d.include? 'localhost' } + Rails.cache.hmset('user_email_domains', domains) + end +end diff --git a/app/views/moderator/pii_correlation.html.erb b/app/views/moderator/pii_correlation.html.erb new file mode 100644 index 000000000..49f3c6dc3 --- /dev/null +++ b/app/views/moderator/pii_correlation.html.erb @@ -0,0 +1,25 @@ +

PII correlation for <%= user_link @user %>

+

+ This tool displays correlations between personally-identifying information across user accounts. This includes email + address and IP addresses. Please select a user against whom to compare. +

+ + +
+
+
+ + +
+
+ +
+
+
+ +

+ Information is hashed to protect users' privacy. + Text highlighted in red indicates matching data. +

+ +
diff --git a/app/views/moderator/pii_correlation.template.erb b/app/views/moderator/pii_correlation.template.erb new file mode 100644 index 000000000..16c8b4ee0 --- /dev/null +++ b/app/views/moderator/pii_correlation.template.erb @@ -0,0 +1,126 @@ +

Comparing with <%= user_link @target %>

+ +

Email address

+ + + + + + + + + + + + <% user_handle = Digest::SHA2.hexdigest(@user.email.split('@')[0]) %> + <% target_handle = Digest::SHA2.hexdigest(@target.email.split('@')[0]) %> + + + + + + + + + +
HandleDomain
<%= @user.rtl_safe_username %> + + <%= user_handle[0..7] %> + + + <%= Digest::SHA2.hexdigest(@user.email.split('@')[1])[0..7] %>
+ <% domain_users = Rails.cache.hget 'user_email_domains', @user.email.split('@')[1] %> + <% if domain_users.nil? %> + (unknown number of users) + <% else %> + Used by <%= pluralize(domain_users, 'user') %> + <% end %> +
<%= @target.rtl_safe_username %> + + <%= target_handle[0..7] %> + + + <%= Digest::SHA2.hexdigest(@target.email.split('@')[1])[0..7] %>
+ <% domain_users = Rails.cache.hget 'user_email_domains', @target.email.split('@')[1] %> + <% if domain_users.nil? %> + (unknown number of users) + <% else %> + Used by <%= pluralize(domain_users, 'user') %> + <% end %> +

+ +

IP addresses

+ +<% user_current_family, user_current_ip = split_hash_ip(@user.current_sign_in_ip, @user) %> +<% user_joiner = user_current_family == 'IPv4' ? '.' : ':' %> +<% target_current_family, target_current_ip = split_hash_ip(@target.current_sign_in_ip, @user) %> +<% target_joiner = target_current_family == 'IPv4' ? '.' : ':' %> +Current sign-in
+ + + + + + + + + + + + + + + + + + + + +
UserFamilyAddress
<%= @user.rtl_safe_username %><%= user_current_family %> + <% user_current_ip.map.with_index do |p, i| %> + + <%= p[0..3] %><%= user_joiner if i < user_current_ip.length - 1 %> + <% end %> +
<%= @target.rtl_safe_username %><%= target_current_family %> + <% target_current_ip.map.with_index do |p, i| %> + + <%= p[0..3] %><%= target_joiner if i < target_current_ip.length - 1 %> + <% end %> +

+ +<% user_last_family, user_last_ip = split_hash_ip(@user.last_sign_in_ip, @user) %> +<% user_joiner = user_last_family == 'IPv4' ? '.' : ':' %> +<% target_last_family, target_last_ip = split_hash_ip(@target.last_sign_in_ip, @user) %> +<% target_joiner = target_last_family == 'IPv4' ? '.' : ':' %> +Last sign-in
+ + + + + + + + + + + + + + + + + + + + +
UserFamilyAddress
<%= @user.rtl_safe_username %><%= user_last_family %> + <% user_last_ip.map.with_index do |p, i| %> + + <%= p[0..3] %><%= user_joiner if i < user_last_ip.length - 1 %> + <% end %> +
<%= @target.rtl_safe_username %><%= target_last_family %> + <% target_last_ip.map.with_index do |p, i| %> + + <%= p[0..3] %><%= target_joiner if i < target_last_ip.length - 1 %> + <% end %> +
\ No newline at end of file diff --git a/config/initializers/mime_types.rb b/config/initializers/mime_types.rb index dc1899682..eeb9ef7d1 100644 --- a/config/initializers/mime_types.rb +++ b/config/initializers/mime_types.rb @@ -2,3 +2,5 @@ # Add new mime types for use in respond_to blocks: # Mime::Type.register "text/richtext", :rtf + +Mime::Type.register('text/html+template', :template) diff --git a/config/routes.rb b/config/routes.rb index fc4c5ca46..9523121e5 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -226,6 +226,7 @@ get '/:id/mod/activity-log', to: 'users#full_log', as: :full_user_log post '/:id/hellban', to: 'admin#hellban', as: :hellban_user get '/:id/avatar/:size', to: 'users#avatar', as: :user_auto_avatar + get '/:id/mod/pii', to: 'moderator#pii_correlation', as: :mod_pii_correlation end post 'notifications/:id/read', to: 'notifications#read', as: :read_notifications diff --git a/config/schedule.rb b/config/schedule.rb index d8472d7b7..81c156ae5 100644 --- a/config/schedule.rb +++ b/config/schedule.rb @@ -34,6 +34,10 @@ runner 'scripts/run_new_thread_followers_cleanup.rb' end +every 7.days, at: '05:00' do + runner 'scripts/run_user_stats.rb' +end + every 6.hours do runner 'scripts/recalc_abilities.rb' end diff --git a/lib/namespaced_env_cache.rb b/lib/namespaced_env_cache.rb index 571ca1d98..68473e58f 100644 --- a/lib/namespaced_env_cache.rb +++ b/lib/namespaced_env_cache.rb @@ -1,5 +1,10 @@ +require_relative 'redis_cache_hash_methods' + module QPixel class NamespacedEnvCache < ActiveSupport::Cache::Store + include RedisCacheHashMethods + attr_reader :underlying + def initialize(underlying) @underlying = underlying @getters = {} @@ -144,6 +149,14 @@ def self.supports_cache_versioning? true end + def method_missing(name, *args, **opts, &block) + @underlying.send(name, *args, **opts, &block) + end + + def respond_to_missing?(name, *) + @underlying.respond_to?(name) + end + private # Raises an error if a given collection is not cacheable diff --git a/lib/redis_cache_hash_methods.rb b/lib/redis_cache_hash_methods.rb new file mode 100644 index 000000000..d080aca01 --- /dev/null +++ b/lib/redis_cache_hash_methods.rb @@ -0,0 +1,103 @@ +module QPixel + ## + # Class for inclusion in a +RedisCacheStore+ or +NamespacedEnvCache+ to add Redis hash methods. + # If the class is not a +RedisCacheStore+ or +NamespacedEnvCache+, these methods will still be added but will + # raise at runtime. The cache implementation must be using +ConnectionPool+. + module RedisCacheHashMethods + ## + # Set a hash value. + # @param hash_key [String] The name of the hash + # @param key [String] The key within the hash + # @param value [String] The key's value + # @return [Integer] The number of keys that were added to the hash + def hset(hash_key, key, value) + with_redis do |rd| + rd.hset hash_key, key, value + end + end + + ## + # Set multiple hash values. + # @param hash_key [String] The name of the hash + # @param data [Hash] Keys and values to add to the hash + # @return [String] 'OK' + def hmset(hash_key, data) + with_redis do |rd| + rd.hmset hash_key, data.to_a.flatten + end + end + + ## + # Get a hash value. + # @param hash_key [String] The name of the hash + # @param key [String] The key within the hash + # @return [String] The key's value + def hget(hash_key, key) + with_redis do |rd| + rd.hget hash_key, key + end + end + + ## + # Get multiple hash values. + # @param hash_key [String] The name of the hash + # @param *keys [String] Keys within the hash to retrieve + # @return [Hash] Keys and values from the hash + def hmget(hash_key, *keys) + with_redis do |rd| + values = rd.hmget hash_key, *keys + keys.zip(values).to_h + end + end + + ## + # Get all hash values. + # @param hash_key [String] The name of the hash + # @return [Hash] The hash's values + def hgetall(hash_key) + with_redis do |rd| + rd.hgetall hash_key + end + end + + ## + # Delete a hash value, or the entire hash. + # @param hash_key [String] The name of the hash + # @param *keys [String] Keys within the hash to delete. If none are provided, the entire hash is deleted. + # @return [Integer] The number of keys that were removed from the hash + def hdel(hash_key, *keys) + with_redis do |rd| + if keys.empty? + rd.del hash_key + else + rd.hdel hash_key, *keys + end + end + end + + private + + ## + # Check a connection out of the connection pool and provides it to the block to run Redis commands. + # @yield [Redis::Client] + def with_redis(&block) + reject_unless_redis_cache! + redis_cache_store = ActiveSupport::Cache::RedisCacheStore + redis_cache = is_a?(redis_cache_store) ? self : underlying + redis_cache.redis.with do |rd| + block.call rd + end + end + + ## + # Raises an error unless the current class is a +RedisCacheStore+, or is a +NamespacedEnvCache+ that is backed by + # a +RedisCacheStore+. + # @raise [NotImplementedError] + def reject_unless_redis_cache! + redis_cache_store = ActiveSupport::Cache::RedisCacheStore + unless is_a?(redis_cache_store) || (respond_to?(:underlying) && underlying.is_a?(redis_cache_store)) + raise NotImplementedError, 'This cache implementation is not backed by Redis and cannot use Hash methods.' + end + end + end +end \ No newline at end of file diff --git a/scripts/run_user_stats.rb b/scripts/run_user_stats.rb new file mode 100644 index 000000000..6fc6cc0d1 --- /dev/null +++ b/scripts/run_user_stats.rb @@ -0,0 +1 @@ +UpdateUserStatsJob.perform_later diff --git a/test/jobs/update_user_stats_job_test.rb b/test/jobs/update_user_stats_job_test.rb new file mode 100644 index 000000000..767ff2a3f --- /dev/null +++ b/test/jobs/update_user_stats_job_test.rb @@ -0,0 +1,7 @@ +require "test_helper" + +class UpdateUserStatsJobTest < ActiveJob::TestCase + # test "the truth" do + # assert true + # end +end diff --git a/test/lib/redis_cache_hash_methods_test.rb b/test/lib/redis_cache_hash_methods_test.rb new file mode 100644 index 000000000..3e99f29bd --- /dev/null +++ b/test/lib/redis_cache_hash_methods_test.rb @@ -0,0 +1,24 @@ +require 'test_helper' + +class RedisCacheHashMethodsTest < ActiveSupport::TestCase + test 'redis cache hash methods' do + assert_nil Rails.cache.read('test_hash') + assert_equal 1, Rails.cache.hset('test_hash', 'key', 'value') + assert_equal 'value', Rails.cache.hget('test_hash', 'key') + assert_equal 'OK', Rails.cache.hmset('test_hash', { 'key2' => 'value2', 'key3' => 'value3' }) + assert_equal({ 'key' => 'value', 'key2' => 'value2', 'key3' => 'value3' }, + Rails.cache.hmget('test_hash', 'key', 'key2', 'key3')) + assert_equal({ 'key' => 'value', 'key2' => 'value2', 'key3' => 'value3' }, + Rails.cache.hgetall('test_hash')) + assert_equal 1, Rails.cache.hdel('test_hash', 'key3') + assert_equal 1, Rails.cache.hdel('test_hash') + assert_nil Rails.cache.read('test_hash') + end + + test 'rejects calls on unimplemented caches' do + cache = QPixel::NamespacedEnvCache.new(ActiveSupport::Cache::MemoryStore.new) + assert_raises NotImplementedError do + cache.hgetall('test_hash') + end + end +end