From baf24942be114828cf13c9aaa1919bcc4b145bf1 Mon Sep 17 00:00:00 2001 From: Wendel Fabian Chinsamy Date: Wed, 26 Nov 2025 13:11:41 +0200 Subject: [PATCH 1/2] Add batch processing logic to the crossref rake task --- lib/tasks/event.rake | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/lib/tasks/event.rake b/lib/tasks/event.rake index 65aca0b..db9f2b8 100644 --- a/lib/tasks/event.rake +++ b/lib/tasks/event.rake @@ -15,14 +15,19 @@ namespace :event do events = Event .where(source_id: ["crossref", "datacite-crossref"]) .where(created_at: start_date...end_date) + .order(:id) puts("Number of events: #{events.count}") - Parallel.each(events, in_threads: 20) do |event| - SqsUtilities.send_events_other_doi_job_message({ - subj_id: event.subj_id, - obj_id: event.obj_id, - }) + events.in_batches(of: 10_000) do |batch| + batch_events = batch.select(:id, :subj_id, :obj_id).to_a + + Parallel.each(batch_events, in_threads: 20) do |batch_event| + SqsUtilities.send_events_other_doi_job_message({ + subj_id: batch_event.subj_id, + obj_id: batch_event.obj_id, + }) + end end end end From 6aae5c24ff582fe30ed42573d36184197462de50 Mon Sep 17 00:00:00 2001 From: Wendel Fabian Chinsamy Date: Wed, 26 Nov 2025 13:15:37 +0200 Subject: [PATCH 2/2] Add some print statements to make following the task flow easier --- lib/tasks/event.rake | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/tasks/event.rake b/lib/tasks/event.rake index db9f2b8..5ab7ff5 100644 --- a/lib/tasks/event.rake +++ b/lib/tasks/event.rake @@ -19,7 +19,11 @@ namespace :event do puts("Number of events: #{events.count}") + batch_count = 0 + events.in_batches(of: 10_000) do |batch| + batch_count += 1 + puts("Processing batch: #{batch_count}") batch_events = batch.select(:id, :subj_id, :obj_id).to_a Parallel.each(batch_events, in_threads: 20) do |batch_event| @@ -29,5 +33,7 @@ namespace :event do }) end end + + puts("Rake task has completed!") end end