From b9259d3bdf951023f0cd42cbce62492f700a76df Mon Sep 17 00:00:00 2001 From: codycooperross <50597551+codycooperross@users.noreply.github.com> Date: Fri, 27 Feb 2026 13:29:25 +0100 Subject: [PATCH 1/4] Test implementation of reindexing queueing for touched DOIs --- app/constants/relation_types.rb | 4 ++++ app/models/event.rb | 20 ++++++++++++++++++++ app/utilities/sqs_utilities.rb | 5 +++++ lib/tasks/event.rake | 18 ++++++++++++++++++ 4 files changed, 47 insertions(+) diff --git a/app/constants/relation_types.rb b/app/constants/relation_types.rb index 422c523..c547043 100644 --- a/app/constants/relation_types.rb +++ b/app/constants/relation_types.rb @@ -68,4 +68,8 @@ module RelationTypes "datacite-crossref", "crossref", ].freeze + + # relation_type_ids that affect counts in lupo. + SOURCE_RELATION_TYPES = %w[references parts versions].freeze + TARGET_RELATION_TYPES = %w[views downloads citations part_of version_of].freeze end diff --git a/app/models/event.rb b/app/models/event.rb index f65cece..68f870b 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -139,4 +139,24 @@ def obj_hash indexes :citation_year, type: :integer indexes :cache_key, type: :keyword end + + def self.reindex_touched_dois(start_date:, end_date:, threads: 20) + total = 0 + + start_date.to_date.upto(end_date.to_date) do |date| + dois = Set.new + + where(updated_at: date.all_day).where(source_relation_type_id: RelationTypes::SOURCE_RELATION_TYPES).distinct.pluck(:source_doi).each { |doi| dois << doi } + where(updated_at: date.all_day).where(target_relation_type_id: RelationTypes::TARGET_RELATION_TYPES).distinct.pluck(:target_doi).each { |doi| dois << doi } + + # Test performance before enabling SQS queues. + # Parallel.each(dois.to_a, in_threads: threads) do |doi| + # SqsUtilities.send_events_doi_index_message({ doi: doi }) + # end + + total += dois.size + end + + total + end end diff --git a/app/utilities/sqs_utilities.rb b/app/utilities/sqs_utilities.rb index ddfe12b..f9e4525 100644 --- a/app/utilities/sqs_utilities.rb +++ b/app/utilities/sqs_utilities.rb @@ -6,6 +6,11 @@ def send_events_other_doi_job_message(data) send_message(data, shoryuken_class: "OtherDoiJob", queue_name: "events_other_doi_job") end + # ReindexDoiJob not implemented yet. + # def send_events_doi_index_message(data) + # send_message(data, shoryuken_class: "ReindexDoiJob", queue_name: "lupo_background") + # end + private def send_message(body, options = {}) diff --git a/lib/tasks/event.rake b/lib/tasks/event.rake index 8267c0f..76c5f34 100644 --- a/lib/tasks/event.rake +++ b/lib/tasks/event.rake @@ -34,4 +34,22 @@ namespace :event do puts("Rake task has completed!") end + + desc "Re-queue SQS re-index messages for unique DOIs in events updated within a date range" + # Dates are inclusive. END_DATE defaults to START_DATE for single-day runs. + task reindex_touched_dois: :environment do + raise "START_DATE is required" if ENV["START_DATE"].blank? + + start_date = Date.parse(ENV["START_DATE"]) + end_date = Date.parse(ENV["END_DATE"].presence || ENV["START_DATE"]) + + raise "END_DATE must be on or after START_DATE" if end_date < start_date + + count = Event.reindex_touched_dois( + start_date: start_date, + end_date: end_date + ) + + puts "Sent #{count} unique DOIs for re-indexing." + end end From 0c8913af376ee541e31e2d9fba0ece497c36f01f Mon Sep 17 00:00:00 2001 From: codycooperross <50597551+codycooperross@users.noreply.github.com> Date: Fri, 27 Feb 2026 13:43:11 +0100 Subject: [PATCH 2/4] Linting --- app/constants/relation_types.rb | 4 ++-- app/models/event.rb | 42 ++++++++++++++++++++------------- lib/tasks/event.rake | 2 +- 3 files changed, 28 insertions(+), 20 deletions(-) diff --git a/app/constants/relation_types.rb b/app/constants/relation_types.rb index c547043..8ea85bd 100644 --- a/app/constants/relation_types.rb +++ b/app/constants/relation_types.rb @@ -70,6 +70,6 @@ module RelationTypes ].freeze # relation_type_ids that affect counts in lupo. - SOURCE_RELATION_TYPES = %w[references parts versions].freeze - TARGET_RELATION_TYPES = %w[views downloads citations part_of version_of].freeze + SOURCE_RELATION_TYPES = ["references", "parts", "versions"].freeze + TARGET_RELATION_TYPES = ["views", "downloads", "citations", "part_of", "version_of"].freeze end diff --git a/app/models/event.rb b/app/models/event.rb index 68f870b..7ccd29a 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -140,23 +140,31 @@ def obj_hash indexes :cache_key, type: :keyword end - def self.reindex_touched_dois(start_date:, end_date:, threads: 20) - total = 0 - - start_date.to_date.upto(end_date.to_date) do |date| - dois = Set.new - - where(updated_at: date.all_day).where(source_relation_type_id: RelationTypes::SOURCE_RELATION_TYPES).distinct.pluck(:source_doi).each { |doi| dois << doi } - where(updated_at: date.all_day).where(target_relation_type_id: RelationTypes::TARGET_RELATION_TYPES).distinct.pluck(:target_doi).each { |doi| dois << doi } - - # Test performance before enabling SQS queues. - # Parallel.each(dois.to_a, in_threads: threads) do |doi| - # SqsUtilities.send_events_doi_index_message({ doi: doi }) - # end - - total += dois.size + class << self + def reindex_touched_dois(start_date:, end_date:, threads: 20) + total = 0 + + start_date.to_date.upto(end_date.to_date) do |date| + dois = Set.new + + where(updated_at: date.all_day) + .where(source_relation_type_id: RelationTypes::SOURCE_RELATION_TYPES) + .distinct.pluck(:source_doi) + .each { |doi| dois << doi } + where(updated_at: date.all_day) + .where(target_relation_type_id: RelationTypes::TARGET_RELATION_TYPES) + .distinct.pluck(:target_doi) + .each { |doi| dois << doi } + + # Test performance before enabling SQS queues. + # Parallel.each(dois.to_a, in_threads: threads) do |doi| + # SqsUtilities.send_events_doi_index_message({ doi: doi }) + # end + + total += dois.size + end + + total end - - total end end diff --git a/lib/tasks/event.rake b/lib/tasks/event.rake index 76c5f34..04b1cd2 100644 --- a/lib/tasks/event.rake +++ b/lib/tasks/event.rake @@ -47,7 +47,7 @@ namespace :event do count = Event.reindex_touched_dois( start_date: start_date, - end_date: end_date + end_date: end_date, ) puts "Sent #{count} unique DOIs for re-indexing." From 809782060bf1e9fec9b28c44daa35567e74eb88b Mon Sep 17 00:00:00 2001 From: codycooperross <50597551+codycooperross@users.noreply.github.com> Date: Fri, 27 Feb 2026 14:00:34 +0100 Subject: [PATCH 3/4] Change threads to 10 --- app/models/event.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/models/event.rb b/app/models/event.rb index 7ccd29a..388c829 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -141,7 +141,7 @@ def obj_hash end class << self - def reindex_touched_dois(start_date:, end_date:, threads: 20) + def reindex_touched_dois(start_date:, end_date:, threads: 10) total = 0 start_date.to_date.upto(end_date.to_date) do |date| From 94a554392d098847711b130ac35eee78d6067bff Mon Sep 17 00:00:00 2001 From: codycooperross <50597551+codycooperross@users.noreply.github.com> Date: Thu, 5 Mar 2026 15:54:58 -0500 Subject: [PATCH 4/4] Compatibility with lupo job --- app/models/event.rb | 2 +- app/utilities/sqs_utilities.rb | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/app/models/event.rb b/app/models/event.rb index 388c829..0b06527 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -158,7 +158,7 @@ def reindex_touched_dois(start_date:, end_date:, threads: 10) # Test performance before enabling SQS queues. # Parallel.each(dois.to_a, in_threads: threads) do |doi| - # SqsUtilities.send_events_doi_index_message({ doi: doi }) + # SqsUtilities.send_events_doi_index_message(doi) # end total += dois.size diff --git a/app/utilities/sqs_utilities.rb b/app/utilities/sqs_utilities.rb index f9e4525..aebfe83 100644 --- a/app/utilities/sqs_utilities.rb +++ b/app/utilities/sqs_utilities.rb @@ -6,10 +6,9 @@ def send_events_other_doi_job_message(data) send_message(data, shoryuken_class: "OtherDoiJob", queue_name: "events_other_doi_job") end - # ReindexDoiJob not implemented yet. - # def send_events_doi_index_message(data) - # send_message(data, shoryuken_class: "ReindexDoiJob", queue_name: "lupo_background") - # end + def send_events_doi_index_message(data) + send_message(data, shoryuken_class: "ReindexByDoiJob", queue_name: "lupo_background") + end private