diff --git a/app/constants/relation_types.rb b/app/constants/relation_types.rb index 422c523..8ea85bd 100644 --- a/app/constants/relation_types.rb +++ b/app/constants/relation_types.rb @@ -68,4 +68,8 @@ module RelationTypes "datacite-crossref", "crossref", ].freeze + + # relation_type_ids that affect counts in lupo. + SOURCE_RELATION_TYPES = ["references", "parts", "versions"].freeze + TARGET_RELATION_TYPES = ["views", "downloads", "citations", "part_of", "version_of"].freeze end diff --git a/app/models/event.rb b/app/models/event.rb index f65cece..0b06527 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -139,4 +139,32 @@ def obj_hash indexes :citation_year, type: :integer indexes :cache_key, type: :keyword end + + class << self + def reindex_touched_dois(start_date:, end_date:, threads: 10) + total = 0 + + start_date.to_date.upto(end_date.to_date) do |date| + dois = Set.new + + where(updated_at: date.all_day) + .where(source_relation_type_id: RelationTypes::SOURCE_RELATION_TYPES) + .distinct.pluck(:source_doi) + .each { |doi| dois << doi } + where(updated_at: date.all_day) + .where(target_relation_type_id: RelationTypes::TARGET_RELATION_TYPES) + .distinct.pluck(:target_doi) + .each { |doi| dois << doi } + + # Test performance before enabling SQS queues. + # Parallel.each(dois.to_a, in_threads: threads) do |doi| + # SqsUtilities.send_events_doi_index_message(doi) + # end + + total += dois.size + end + + total + end + end end diff --git a/app/utilities/sqs_utilities.rb b/app/utilities/sqs_utilities.rb index ddfe12b..aebfe83 100644 --- a/app/utilities/sqs_utilities.rb +++ b/app/utilities/sqs_utilities.rb @@ -6,6 +6,10 @@ def send_events_other_doi_job_message(data) send_message(data, shoryuken_class: "OtherDoiJob", queue_name: "events_other_doi_job") end + def send_events_doi_index_message(data) + send_message(data, shoryuken_class: "ReindexByDoiJob", queue_name: "lupo_background") + end + private def send_message(body, options = {}) diff --git a/lib/tasks/event.rake b/lib/tasks/event.rake index 8267c0f..04b1cd2 100644 --- a/lib/tasks/event.rake +++ b/lib/tasks/event.rake @@ -34,4 +34,22 @@ namespace :event do puts("Rake task has completed!") end + + desc "Re-queue SQS re-index messages for unique DOIs in events updated within a date range" + # Dates are inclusive. END_DATE defaults to START_DATE for single-day runs. + task reindex_touched_dois: :environment do + raise "START_DATE is required" if ENV["START_DATE"].blank? + + start_date = Date.parse(ENV["START_DATE"]) + end_date = Date.parse(ENV["END_DATE"].presence || ENV["START_DATE"]) + + raise "END_DATE must be on or after START_DATE" if end_date < start_date + + count = Event.reindex_touched_dois( + start_date: start_date, + end_date: end_date, + ) + + puts "Sent #{count} unique DOIs for re-indexing." + end end