From 078a74ace5ca83c9899129b8bb7c62948920aeb9 Mon Sep 17 00:00:00 2001 From: Martin Voigt Date: Tue, 25 Nov 2025 12:46:55 +0100 Subject: [PATCH 01/20] Add OAI-PMH Ingestors --- lib/ingestors/ingestor_factory.rb | 4 +- lib/ingestors/oai_pmh_bioschemas_ingestor.rb | 157 +++++++++++++++++++ lib/ingestors/oai_pmh_ingestor.rb | 104 ++++++++++++ 3 files changed, 264 insertions(+), 1 deletion(-) create mode 100644 lib/ingestors/oai_pmh_bioschemas_ingestor.rb create mode 100644 lib/ingestors/oai_pmh_ingestor.rb diff --git a/lib/ingestors/ingestor_factory.rb b/lib/ingestors/ingestor_factory.rb index 67e818d02..913deacb6 100644 --- a/lib/ingestors/ingestor_factory.rb +++ b/lib/ingestors/ingestor_factory.rb @@ -10,6 +10,8 @@ def self.ingestors Ingestors::MaterialCsvIngestor, Ingestors::TessEventIngestor, Ingestors::ZenodoIngestor, + Ingestors::OaiPmhIngestor, + Ingestors::OaiPmhBioschemasIngestor ] + taxila_ingestors + llm_ingestors end @@ -35,7 +37,7 @@ def self.taxila_ingestors Ingestors::Taxila::OsciIngestor, Ingestors::Taxila::DccIngestor, Ingestors::Taxila::SenseIngestor, - Ingestors::Taxila::VuMaterialIngestor, + Ingestors::Taxila::VuMaterialIngestor ] end diff --git a/lib/ingestors/oai_pmh_bioschemas_ingestor.rb b/lib/ingestors/oai_pmh_bioschemas_ingestor.rb new file mode 100644 index 000000000..925b982f6 --- /dev/null +++ b/lib/ingestors/oai_pmh_bioschemas_ingestor.rb @@ -0,0 +1,157 @@ +require 'open-uri' +require 'tess_rdf_extractors' + +module Ingestors + class OaiPmhBioschemasIngestor < Ingestor + DUMMY_URL = 'https://example.com' + + attr_reader :verbose + + def self.config + { + key: 'oai_pmh_bioschemas', + title: 'OAI-PMH (Bioschemas RDF)', + user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0', + mail: Rails.configuration.tess['contact_email'] + } + end + + def read(source_url) + provider_events = [] + provider_materials = [] + totals = Hash.new(0) + + client = OAI::Client.new source_url, headers: { 'From' => config[:mail] } + client.list_records(metadata_prefix: 'rdf').full.each do |record| + metadata_tag = Nokogiri::XML(record.metadata.to_s) + bioschemas_xml = metadata_tag.at_xpath('metadata/rdf:RDF', 'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#')&.to_s + output = read_content(bioschemas_xml) + next unless output + + provider_events += output[:resources][:events] + provider_materials += output[:resources][:materials] + output[:totals].each do |key, value| + totals[key] += value + end + end + + if totals.keys.any? + bioschemas_summary = "Bioschemas summary:\n" + totals.each do |type, count| + bioschemas_summary << "\n - #{type}: #{count}" + end + @messages << bioschemas_summary + end + + deduplicate(provider_events).each do |event_params| + add_event(event_params) + end + + deduplicate(provider_materials).each do |material_params| + add_material(material_params) + end + end + + def read_content(content) + output = { + resources: { + events: [], + materials: [] + }, + totals: Hash.new(0) + } + + return output unless content + + begin + events = Tess::Rdf::EventExtractor.new(content, :rdfxml).extract do |p| + convert_params(p) + end + courses = Tess::Rdf::CourseExtractor.new(content, :rdfxml).extract do |p| + convert_params(p) + end + course_instances = Tess::Rdf::CourseInstanceExtractor.new(content, :rdfxml).extract do |p| + convert_params(p) + end + learning_resources = Tess::Rdf::LearningResourceExtractor.new(content, :rdfxml).extract do |p| + convert_params(p) + end + output[:totals]['Events'] += events.count + output[:totals]['Courses'] += courses.count + output[:totals]['CourseInstances'] += course_instances.count + output[:totals]['LearningResources'] += learning_resources.count + + deduplicate(events + courses + course_instances).each do |event| + output[:resources][:events] << event + end + + deduplicate(learning_resources).each do |material| + output[:resources][:materials] << material + end + rescue StandardError => e + Rails.logger.error("#{e.class}: #{e.message}") + Rails.logger.error(e.backtrace.join("\n")) if e.backtrace&.any? + error = 'An error' + comment = nil + if e.is_a?(RDF::ReaderError) + error = 'A parsing error' + comment = 'Please check your page contains valid RDF/XML.' + end + message = "#{error} occurred while reading the source." + message << " #{comment}" if comment + @messages << message + end + + output + end + + # ---- This is copied unchanged from bioschemas_ingestor.rb and needs to be refactored. ---- + + # If duplicate resources have been extracted, prefer ones with the most metadata. + def deduplicate(resources) + return [] unless resources.any? + + puts "De-duplicating #{resources.count} resources" if verbose + hash = {} + scores = {} + resources.each do |resource| + resource_url = resource[:url] + puts " Considering: #{resource_url}" if verbose + if hash[resource_url] + score = metadata_score(resource) + # Replace the resource if this resource has a higher metadata score + puts " Duplicate! Comparing #{score} vs. #{scores[resource_url]}" if verbose + if score > scores[resource_url] + puts ' Replacing resource' if verbose + hash[resource_url] = resource + scores[resource_url] = score + end + else + puts ' Not present, adding' if verbose + hash[resource_url] = resource + scores[resource_url] = metadata_score(resource) + end + end + + puts "#{hash.values.count} resources after de-duplication" if verbose + + hash.values + end + + # Score based on number of metadata fields available + def metadata_score(resource) + score = 0 + resource.each_value do |value| + score += 1 unless value.nil? || value == {} || value == [] || (value.is_a?(String) && value.strip == '') + end + + score + end + + def convert_params(params) + params[:description] = convert_description(params[:description]) if params.key?(:description) + + params + end + end +end diff --git a/lib/ingestors/oai_pmh_ingestor.rb b/lib/ingestors/oai_pmh_ingestor.rb new file mode 100644 index 000000000..c8edab5a9 --- /dev/null +++ b/lib/ingestors/oai_pmh_ingestor.rb @@ -0,0 +1,104 @@ +require 'open-uri' +require 'tess_rdf_extractors' +require 'oai' +require 'nokogiri' + +module Ingestors + class OaiPmhIngestor < Ingestor + DUMMY_URL = 'https://example.com' + + attr_reader :verbose + + def self.config + { + key: 'oai_pmh', + title: 'OAI-PMH', + user_agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0', + mail: Rails.configuration.tess['contact_email'] + } + end + + def ns + { + 'dc' => 'http://purl.org/dc/elements/1.1/', + 'oai_dc' => 'http://www.openarchives.org/OAI/2.0/oai_dc/' + } + end + + def read(source_url) + client = OAI::Client.new source_url, headers: { 'From' => config[:mail] } + count = 0 + client.list_records.full.each do |record| + read_dublin_core(record.metadata.to_s) + count += 1 + end + @messages << "found #{count} records" + end + + def read_dublin_core(xml_string) + doc = Nokogiri::XML(xml_string) + + types = doc.xpath('//dc:type', ns).map(&:text) + if types.include?('http://purl.org/dc/dcmitype/Event') + read_dublin_core_event(doc) + else + read_dublin_core_material(doc) + end + end + + def read_dublin_core_material(xml_doc) + material = OpenStruct.new + material.title = xml_doc.at_xpath('//dc:title', ns)&.text + material.description = convert_description(xml_doc.at_xpath('//dc:description', ns)&.text) + material.authors = xml_doc.xpath('//dc:creator', ns).map(&:text) + material.contributors = xml_doc.xpath('//dc:contributor', ns).map(&:text) + material.licence = xml_doc.at_xpath('//dc:rights', ns)&.text + + dates = xml_doc.xpath('//dc:date', ns).map(&:text) + parsed_dates = dates.map do |d| + Date.parse(d) + rescue StandardError + nil + end.compact + material.date_created = parsed_dates.first + material.date_modified = parsed_dates.last if parsed_dates.size > 1 + + identifiers = xml_doc.xpath('//dc:identifier', ns).map(&:text) + doi = identifiers.find { |id| id.start_with?('10.') || id.include?('doi.org') } + if doi + doi = doi&.sub(%r{https?://doi\.org/}, '') + material.doi = "https://doi.org/#{doi}" + end + material.url = identifiers.find { |id| id.start_with?('http://', 'https://') } + + material.keywords = xml_doc.xpath('//dc:subject', ns).map(&:text) + material.resource_type = xml_doc.xpath('//dc:type', ns).map(&:text) + material.contact = xml_doc.at_xpath('//dc:publisher', ns)&.text + + add_material material + end + + def read_dublin_core_event(_xml_doc) + event = OpenStruct.new + + event.title = doc.at_xpath('//dc:title', ns)&.text + event.description = convert_description(doc.at_xpath('//dc:description', ns)&.text) + event.url = doc.xpath('//dc:identifier', ns).map(&:text).find { |id| id.start_with?('http://', 'https://') } + event.contact = doc.at_xpath('//dc:publisher', ns)&.text + event.organizer = doc.at_xpath('//dc:creator', ns)&.text + event.keywords = doc.xpath('//dc:subject', ns).map(&:text) + event.event_types = types + + dates = doc.xpath('//dc:date', ns).map(&:text) + parsed_dates = dates.map do |d| + Date.parse(d) + rescue StandardError + nil + end.compact + event.start = parsed_dates.first + event.end = parsed_dates.last + + add_event event + end + end +end From 154629d78cffe9c71f26d9c464de03621067736e Mon Sep 17 00:00:00 2001 From: Martin Voigt Date: Mon, 8 Dec 2025 22:28:28 +0100 Subject: [PATCH 02/20] #1192 Initial exchange filter implementation - support filtering by keywords --- app/controllers/sources_controller.rb | 21 ++++++++++--------- app/views/sources/_form.html.erb | 2 ++ app/workers/source_test_worker.rb | 6 ++++-- ...208203629_add_keyword_filter_to_sources.rb | 5 +++++ db/schema.rb | 3 ++- lib/ingestors/ingestor.rb | 14 ++++++++----- 6 files changed, 33 insertions(+), 18 deletions(-) create mode 100644 db/migrate/20251208203629_add_keyword_filter_to_sources.rb diff --git a/app/controllers/sources_controller.rb b/app/controllers/sources_controller.rb index 33142098e..9112032f9 100644 --- a/app/controllers/sources_controller.rb +++ b/app/controllers/sources_controller.rb @@ -1,6 +1,6 @@ class SourcesController < ApplicationController - before_action :set_source, except: [:index, :new, :create, :check_exists] - before_action :set_content_provider, except: [:index, :check_exists] + before_action :set_source, except: %i[index new create check_exists] + before_action :set_content_provider, except: %i[index check_exists] before_action :set_breadcrumbs include SearchableIndex @@ -64,8 +64,8 @@ def check_exists end else respond_to do |format| - format.html { render :nothing => true, :status => 200, :content_type => 'text/html' } - format.json { render json: {}, :status => 200, :content_type => 'application/json' } + format.html { render nothing: true, status: 200, content_type: 'text/html' } + format.json { render json: {}, status: 200, content_type: 'application/json' } end end end @@ -93,8 +93,10 @@ def destroy @source.create_activity :destroy, owner: current_user @source.destroy respond_to do |format| - format.html { redirect_to policy(Source).index? ? sources_path : content_provider_path(@content_provider), - notice: 'Source was successfully deleted.' } + format.html do + redirect_to policy(Source).index? ? sources_path : content_provider_path(@content_provider), + notice: 'Source was successfully deleted.' + end format.json { head :no_content } end end @@ -105,7 +107,7 @@ def test @source.test_job_id = job_id respond_to do |format| - format.json { render json: { id: job_id }} + format.json { render json: { id: job_id } } end end @@ -150,7 +152,7 @@ def set_content_provider # Never trust parameters from the scary internet, only allow the white list through. def source_params - permitted = [:url, :method, :token, :default_language, :enabled] + permitted = %i[url method token default_language enabled keyword_filter] permitted << :approval_status if policy(Source).approve? permitted << :content_provider_id if policy(Source).index? @@ -164,7 +166,7 @@ def set_breadcrumbs add_breadcrumb 'Sources', content_provider_path(@content_provider, anchor: 'sources') if params[:id] - add_breadcrumb @source.title, content_provider_source_path(@content_provider, @source) if (@source && !@source.new_record?) + add_breadcrumb @source.title, content_provider_source_path(@content_provider, @source) if @source && !@source.new_record? add_breadcrumb action_name.capitalize.humanize, request.path unless action_name == 'show' elsif action_name != 'index' add_breadcrumb action_name.capitalize.humanize, request.path @@ -173,5 +175,4 @@ def set_breadcrumbs super end end - end diff --git a/app/views/sources/_form.html.erb b/app/views/sources/_form.html.erb index 6a791a0d8..a495874b6 100644 --- a/app/views/sources/_form.html.erb +++ b/app/views/sources/_form.html.erb @@ -36,6 +36,8 @@ include_blank: false %> <% end %> + <%= f.input :keyword_filter, hint: 'Comma separated list of keywords that must be present. Leave empty to disable filter.', label: 'Keyword Filter' %> +
<%= f.submit(class: 'btn btn-primary') %> <%= link_to t('.cancel', default: t("helpers.links.cancel")), diff --git a/app/workers/source_test_worker.rb b/app/workers/source_test_worker.rb index 153f6e1e8..ef5638613 100644 --- a/app/workers/source_test_worker.rb +++ b/app/workers/source_test_worker.rb @@ -9,6 +9,7 @@ class SourceTestWorker def perform(source_id) source = Source.find_by_id(source_id) return unless source + results = { events: [], materials: [], @@ -20,13 +21,14 @@ def perform(source_id) ingestor = Ingestors::IngestorFactory.get_ingestor(source.method) ingestor.token = source.token ingestor.read(source.url) + ingestor.filter(source) results = { events: ingestor.events, materials: ingestor.materials, - messages: ingestor.messages, + messages: ingestor.messages } rescue StandardError => e - results[:messages] << "Ingestor encountered an unexpected error" + results[:messages] << 'Ingestor encountered an unexpected error' exception = e end diff --git a/db/migrate/20251208203629_add_keyword_filter_to_sources.rb b/db/migrate/20251208203629_add_keyword_filter_to_sources.rb new file mode 100644 index 000000000..185df36a5 --- /dev/null +++ b/db/migrate/20251208203629_add_keyword_filter_to_sources.rb @@ -0,0 +1,5 @@ +class AddKeywordFilterToSources < ActiveRecord::Migration[7.2] + def change + add_column :sources, :keyword_filter, :string + end +end diff --git a/db/schema.rb b/db/schema.rb index a6b3aa4f3..f98ef5a65 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.2].define(version: 2025_03_25_151745) do +ActiveRecord::Schema[7.2].define(version: 2025_12_08_203629) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -480,6 +480,7 @@ t.integer "approval_status" t.datetime "updated_at" t.string "default_language" + t.string "keyword_filter" t.index ["content_provider_id"], name: "index_sources_on_content_provider_id" t.index ["user_id"], name: "index_sources_on_user_id" end diff --git a/lib/ingestors/ingestor.rb b/lib/ingestors/ingestor.rb index 8f043f135..1b586deef 100644 --- a/lib/ingestors/ingestor.rb +++ b/lib/ingestors/ingestor.rb @@ -33,10 +33,16 @@ def read(_url) raise NotImplementedError end + def filter(source) + keyword_filter = source.keyword_filter.split(',').map(&:strip) + @materials = @materials.select { |m| (Array(m.keywords) & keyword_filter).any? } + end + def write(user, provider, source: nil) - write_resources(Event, @events, user, provider, source: source) + filter(source) if source + write_resources(Event, @events, user, provider, source:) @messages << stats_summary(:events) - write_resources(Material, @materials, user, provider, source: source) + write_resources(Material, @materials, user, provider, source:) @messages << stats_summary(:materials) end @@ -144,9 +150,7 @@ def write_resources(type, resources, user, provider, source: nil) type.new(resource.to_h) end - if resource.has_attribute?(:language) && resource.new_record? - resource.language ||= source&.default_language - end + resource.language ||= source&.default_language if resource.has_attribute?(:language) && resource.new_record? resource = set_resource_defaults(resource) if resource.valid? From ff960b1c4761860d192e89213493e9c3db75fc56 Mon Sep 17 00:00:00 2001 From: Martin Voigt Date: Tue, 16 Dec 2025 09:41:14 +0100 Subject: [PATCH 03/20] #1192 More complex implementation for exchange filters --- app/assets/javascripts/source_filters.js | 21 +++++++ app/assets/stylesheets/sources.scss | 9 +++ app/controllers/sources_controller.rb | 12 +++- app/models/source.rb | 46 +++++++------- app/models/source_filter.rb | 13 ++++ app/views/sources/_form.html.erb | 60 ++++++++++++++++++- .../sources/_source_filter_form.html.erb | 11 ++++ .../20251209112056_create_source_filters.rb | 12 ++++ db/schema.rb | 13 +++- lib/ingestors/ingestor.rb | 2 +- test/fixtures/source_filters.yml | 13 ++++ test/models/source_filter_test.rb | 7 +++ 12 files changed, 191 insertions(+), 28 deletions(-) create mode 100644 app/assets/javascripts/source_filters.js create mode 100644 app/assets/stylesheets/sources.scss create mode 100644 app/models/source_filter.rb create mode 100644 app/views/sources/_source_filter_form.html.erb create mode 100644 db/migrate/20251209112056_create_source_filters.rb create mode 100644 test/fixtures/source_filters.yml create mode 100644 test/models/source_filter_test.rb diff --git a/app/assets/javascripts/source_filters.js b/app/assets/javascripts/source_filters.js new file mode 100644 index 000000000..e6b07f76a --- /dev/null +++ b/app/assets/javascripts/source_filters.js @@ -0,0 +1,21 @@ +var SourceFilters = { + add: function () { + var newForm = $($('#source-filter-template').clone().html()); + newForm.appendTo('#source-filter-list'); + + return false; // Stop form being submitted + }, + + // This is just cosmetic. The actual removal is done by rails, + // by virtue of the hidden checkbox being checked when the label is clicked. + delete: function () { + $(this).parents('.source-filter-form').fadeOut(); + } +}; + +document.addEventListener("turbolinks:load", function() { + $('#source-filters') + .on('click', '#add-source-filter-btn', SourceFilters.add) + .on('click', '#add-source-filter-btn-label', SourceFilters.add) + .on('change', '.delete-source-filter-btn input.destroy-attribute', SourceFilters.delete); +}); diff --git a/app/assets/stylesheets/sources.scss b/app/assets/stylesheets/sources.scss new file mode 100644 index 000000000..3d96aba25 --- /dev/null +++ b/app/assets/stylesheets/sources.scss @@ -0,0 +1,9 @@ +.source-filter-form { + display: flex; + gap: 1em; + margin-bottom: 4px; + + label { + margin-right: 4px; + } +} \ No newline at end of file diff --git a/app/controllers/sources_controller.rb b/app/controllers/sources_controller.rb index 9112032f9..26c7ee439 100644 --- a/app/controllers/sources_controller.rb +++ b/app/controllers/sources_controller.rb @@ -30,6 +30,10 @@ def new # GET /sources/1/edit def edit + puts '------------------' + puts 'b' + @source.source_filters.all.each { |sf| puts sf.filter_value } + puts '------------------' authorize @source end @@ -74,6 +78,10 @@ def check_exists # PATCH/PUT /sources/1.json def update authorize @source + + puts '------------ HI FROM CREATE ------------' + puts source_params + respond_to do |format| if @source.update(source_params) @source.create_activity(:update, owner: current_user) if @source.log_update_activity? @@ -152,11 +160,11 @@ def set_content_provider # Never trust parameters from the scary internet, only allow the white list through. def source_params - permitted = %i[url method token default_language enabled keyword_filter] + permitted = %i[url method token default_language enabled keyword_filter source_filters] permitted << :approval_status if policy(Source).approve? permitted << :content_provider_id if policy(Source).index? - params.require(:source).permit(permitted) + params.require(:source).permit(permitted, source_filters_attributes: %i[id filter_mode filter_by filter_value _destroy]) end def set_breadcrumbs diff --git a/app/models/source.rb b/app/models/source.rb index 381e5da0a..c7e938f11 100644 --- a/app/models/source.rb +++ b/app/models/source.rb @@ -16,12 +16,13 @@ class Source < ApplicationRecord belongs_to :user belongs_to :content_provider + has_many :source_filters, dependent: :destroy validates :url, :method, presence: true validates :url, url: true validates :approval_status, inclusion: { in: APPROVAL_STATUS.values } - validates :method, inclusion: { in: -> (_) { TeSS::Config.user_ingestion_methods } }, - unless: -> { User.current_user&.is_admin? || User.current_user&.has_role?(:scraper_user) } + validates :method, inclusion: { in: ->(_) { TeSS::Config.user_ingestion_methods } }, + unless: -> { User.current_user&.is_admin? || User.current_user&.has_role?(:scraper_user) } validates :default_language, controlled_vocabulary: { dictionary: 'LanguageDictionary', allow_blank: true } validate :check_method @@ -30,6 +31,8 @@ class Source < ApplicationRecord before_update :log_approval_status_change before_update :reset_approval_status + accepts_nested_attributes_for :source_filters, allow_destroy: true + if TeSS::Config.solr_enabled # :nocov: searchable do @@ -44,7 +47,7 @@ class Source < ApplicationRecord ingestor_title end string :content_provider do - self.content_provider.try(:title) + content_provider.try(:title) end string :node, multiple: true do associated_nodes.pluck(:name) @@ -72,18 +75,16 @@ def ingestor_class end def self.facet_fields - field_list = %w( content_provider node method enabled approval_status ) + field_list = %w[content_provider node method enabled approval_status] field_list.delete('node') unless TeSS::Config.feature['nodes'] field_list end def self.check_exists(source_params) - given_source = self.new(source_params) + given_source = new(source_params) source = nil - if given_source.url.present? - source = self.find_by_url(given_source.url) - end + source = find_by_url(given_source.url) if given_source.url.present? source end @@ -137,27 +138,26 @@ def self.approval_required? private def set_approval_status - if self.class.approval_required? - self.approval_status = :not_approved - else - self.approval_status = :approved - end + self.approval_status = if self.class.approval_required? + :not_approved + else + :approved + end end def reset_approval_status - if self.class.approval_required? - if method_changed? || url_changed? - self.approval_status = :not_approved - end - end + return unless self.class.approval_required? + return unless method_changed? || url_changed? + + self.approval_status = :not_approved end def log_approval_status_change - if approval_status_changed? - old = (APPROVAL_STATUS[approval_status_before_last_save.to_i] || APPROVAL_STATUS[0]).to_s - new = approval_status.to_s - create_activity(:approval_status_changed, owner: User.current_user, parameters: { old: old, new: new }) - end + return unless approval_status_changed? + + old = (APPROVAL_STATUS[approval_status_before_last_save.to_i] || APPROVAL_STATUS[0]).to_s + new = approval_status.to_s + create_activity(:approval_status_changed, owner: User.current_user, parameters: { old:, new: }) end def loggable_changes diff --git a/app/models/source_filter.rb b/app/models/source_filter.rb new file mode 100644 index 000000000..433ca314f --- /dev/null +++ b/app/models/source_filter.rb @@ -0,0 +1,13 @@ +class SourceFilter < ApplicationRecord + belongs_to :source + + enum filter_by: { + target_audience: 'target_audience', + keyword: 'keyword' + } + + enum filter_mode: { + allow: 'allow', + block: 'block' + } +end diff --git a/app/views/sources/_form.html.erb b/app/views/sources/_form.html.erb index a495874b6..ec6f39943 100644 --- a/app/views/sources/_form.html.erb +++ b/app/views/sources/_form.html.erb @@ -1,4 +1,5 @@ -<%= simple_form_for (@content_provider ? [@content_provider, @source] : @source), html: { class: 'source', multipart: true } do |f| %> +<%#<%= simple_form_for (@content_provider ? [@content_provider, @source] : @source), html: { class: 'source', multipart: true } do |f| %> +<%= simple_form_for (@source), html: { class: 'source', multipart: true } do |f| %> <%= render partial: 'common/error_summary', locals: { resource: @source } %> <% unless @content_provider %> @@ -38,10 +39,67 @@ <%= f.input :keyword_filter, hint: 'Comma separated list of keywords that must be present. Leave empty to disable filter.', label: 'Keyword Filter' %> +

Filters

+ + <% f.object.source_filters.load %> + <%= 'hi' %> + <%= f.object.class %> + <%= f.object.persisted? %> + <%= f.object.nested_attributes_options %> + <%= f.object.association(:source_filters).reader.class %> + <%= f.object.source_filters.loaded? %>
+ <%= f.object.source_filters.any? %>
+ <%= f.object.source_filters.map(&:marked_for_destruction?) %>
+ <%= @source.object_id %>
+ <%= f.object.object_id %>
+
+ + + <% f.object.source_filters.each_with_index do |sf, index| %> + <%= fields_for "source[source_filters_attributes][#{index}]", sf do |ff| %> + a source exists + <%= ff.text_field :filter_value %> + <% end %> + <% end %> + +
+ <% f.simple_fields_for :source_filters do |ff| %> + a source exists + <%= render partial: 'source_filter_form', locals: { f: ff } %> + <% end %> +
+ +
+ <% f.fields_for :source_filters, @source.source_filters do |ff| %> + a source exists + <%= render partial: 'source_filter_form', locals: { f: ff } %> + <% end %> +
+ + <% f.object.source_filters.each do |filter| %> +
+ <%= f.simple_fields_for :source_filters, filter, child_index: filter.id do |ff| %> + <%= render partial: 'source_filter_form', locals: { f: ff } %> + <% end %> +
+ <% end %> + + + + + Add filter condition +
+
<%= f.submit(class: 'btn btn-primary') %> <%= link_to t('.cancel', default: t("helpers.links.cancel")), sources_path, class: 'btn btn-default' %>
+ + <% end %> diff --git a/app/views/sources/_source_filter_form.html.erb b/app/views/sources/_source_filter_form.html.erb new file mode 100644 index 000000000..4c0d48df3 --- /dev/null +++ b/app/views/sources/_source_filter_form.html.erb @@ -0,0 +1,11 @@ +
+ <%= f.input :filter_by, collection: SourceFilter.filter_bies.keys.map { |t| [t.humanize, t] }, include_blank: false %> + + <%= f.input :filter_value %> + + <%= f.input :filter_mode, collection: SourceFilter.filter_modes.keys.map { |m| [m.humanize, m] }, include_blank: false %> + + <%= f.input :_destroy, as: :hidden %> + + +
diff --git a/db/migrate/20251209112056_create_source_filters.rb b/db/migrate/20251209112056_create_source_filters.rb new file mode 100644 index 000000000..9b3ac7ebe --- /dev/null +++ b/db/migrate/20251209112056_create_source_filters.rb @@ -0,0 +1,12 @@ +class CreateSourceFilters < ActiveRecord::Migration[7.2] + def change + create_table :source_filters do |t| + t.references :source, null: false, foreign_key: true + t.string :filter_mode + t.string :filter_by + t.string :filter_value + + t.timestamps + end + end +end diff --git a/db/schema.rb b/db/schema.rb index f98ef5a65..e987b4215 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.2].define(version: 2025_12_08_203629) do +ActiveRecord::Schema[7.2].define(version: 2025_12_09_112056) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -462,6 +462,16 @@ t.string "title" end + create_table "source_filters", force: :cascade do |t| + t.bigint "source_id", null: false + t.string "filter_mode" + t.string "filter_by" + t.string "filter_value" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["source_id"], name: "index_source_filters_on_source_id" + end + create_table "sources", force: :cascade do |t| t.bigint "content_provider_id" t.bigint "user_id" @@ -665,6 +675,7 @@ add_foreign_key "materials", "users" add_foreign_key "node_links", "nodes" add_foreign_key "nodes", "users" + add_foreign_key "source_filters", "sources" add_foreign_key "sources", "content_providers" add_foreign_key "sources", "users" add_foreign_key "space_roles", "spaces" diff --git a/lib/ingestors/ingestor.rb b/lib/ingestors/ingestor.rb index 1b586deef..8789977fd 100644 --- a/lib/ingestors/ingestor.rb +++ b/lib/ingestors/ingestor.rb @@ -35,7 +35,7 @@ def read(_url) def filter(source) keyword_filter = source.keyword_filter.split(',').map(&:strip) - @materials = @materials.select { |m| (Array(m.keywords) & keyword_filter).any? } + @materials = @materials.select { |m| (Array(m.keywords) & keyword_filter).any? } unless keyword_filter.empty? end def write(user, provider, source: nil) diff --git a/test/fixtures/source_filters.yml b/test/fixtures/source_filters.yml new file mode 100644 index 000000000..539f04b1a --- /dev/null +++ b/test/fixtures/source_filters.yml @@ -0,0 +1,13 @@ +# Read about fixtures at https://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html + +one: + source: one + filter_mode: MyString + filter_by: MyString + filter_value: MyString + +two: + source: two + filter_mode: MyString + filter_by: MyString + filter_value: MyString diff --git a/test/models/source_filter_test.rb b/test/models/source_filter_test.rb new file mode 100644 index 000000000..f91741206 --- /dev/null +++ b/test/models/source_filter_test.rb @@ -0,0 +1,7 @@ +require "test_helper" + +class SourceFilterTest < ActiveSupport::TestCase + # test "the truth" do + # assert true + # end +end From 0e7441313b658a3be5aeaacd9f0731bcb91793e7 Mon Sep 17 00:00:00 2001 From: Martin Voigt Date: Tue, 16 Dec 2025 12:46:16 +0100 Subject: [PATCH 04/20] #1192 Clean up exchange filter implementation --- app/assets/javascripts/source_filters.js | 6 ++- app/controllers/sources_controller.rb | 7 --- app/views/sources/_form.html.erb | 47 +++---------------- .../sources/_source_filter_form.html.erb | 2 +- 4 files changed, 11 insertions(+), 51 deletions(-) diff --git a/app/assets/javascripts/source_filters.js b/app/assets/javascripts/source_filters.js index e6b07f76a..a1b583b9e 100644 --- a/app/assets/javascripts/source_filters.js +++ b/app/assets/javascripts/source_filters.js @@ -1,7 +1,9 @@ var SourceFilters = { add: function () { - var newForm = $($('#source-filter-template').clone().html()); - newForm.appendTo('#source-filter-list'); + var existing_list_item_ids = $("#source-filter-list").children("div").map(function(i, c) { return $(c).data("id-in-filter-list")}); + var new_id = Math.max.apply(null, existing_list_item_ids) + 1; + var new_form = $($('#source-filter-template').clone().html().replace(/REPLACE_ME/g, new_id)); + new_form.appendTo('#source-filter-list'); return false; // Stop form being submitted }, diff --git a/app/controllers/sources_controller.rb b/app/controllers/sources_controller.rb index 26c7ee439..7b75ca831 100644 --- a/app/controllers/sources_controller.rb +++ b/app/controllers/sources_controller.rb @@ -30,10 +30,6 @@ def new # GET /sources/1/edit def edit - puts '------------------' - puts 'b' - @source.source_filters.all.each { |sf| puts sf.filter_value } - puts '------------------' authorize @source end @@ -79,9 +75,6 @@ def check_exists def update authorize @source - puts '------------ HI FROM CREATE ------------' - puts source_params - respond_to do |format| if @source.update(source_params) @source.create_activity(:update, owner: current_user) if @source.log_update_activity? diff --git a/app/views/sources/_form.html.erb b/app/views/sources/_form.html.erb index ec6f39943..b2582a05c 100644 --- a/app/views/sources/_form.html.erb +++ b/app/views/sources/_form.html.erb @@ -1,5 +1,4 @@ -<%#<%= simple_form_for (@content_provider ? [@content_provider, @source] : @source), html: { class: 'source', multipart: true } do |f| %> -<%= simple_form_for (@source), html: { class: 'source', multipart: true } do |f| %> +<%= simple_form_for (@content_provider ? [@content_provider, @source] : @source), html: { class: 'source', multipart: true } do |f| %> <%= render partial: 'common/error_summary', locals: { resource: @source } %> <% unless @content_provider %> @@ -41,48 +40,14 @@

Filters

- <% f.object.source_filters.load %> - <%= 'hi' %> - <%= f.object.class %> - <%= f.object.persisted? %> - <%= f.object.nested_attributes_options %> - <%= f.object.association(:source_filters).reader.class %> - <%= f.object.source_filters.loaded? %>
- <%= f.object.source_filters.any? %>
- <%= f.object.source_filters.map(&:marked_for_destruction?) %>
- <%= @source.object_id %>
- <%= f.object.object_id %>
- - - <% f.object.source_filters.each_with_index do |sf, index| %> - <%= fields_for "source[source_filters_attributes][#{index}]", sf do |ff| %> - a source exists - <%= ff.text_field :filter_value %> - <% end %> - <% end %> - -
- <% f.simple_fields_for :source_filters do |ff| %> - a source exists - <%= render partial: 'source_filter_form', locals: { f: ff } %> - <% end %> -
-
- <% f.fields_for :source_filters, @source.source_filters do |ff| %> - a source exists - <%= render partial: 'source_filter_form', locals: { f: ff } %> - <% end %> -
- - <% f.object.source_filters.each do |filter| %> -
+ <% f.object.source_filters.each do |filter| %> <%= f.simple_fields_for :source_filters, filter, child_index: filter.id do |ff| %> <%= render partial: 'source_filter_form', locals: { f: ff } %> <% end %> -
- <% end %> + <% end %> +
@@ -96,10 +61,10 @@ sources_path, class: 'btn btn-default' %>
-