Skip to content

Commit 41f1dad

Browse files
committed
Revert parallel code generation (incomplete prerequisite handling)
Removes LIBINT2_NUM_WORKERS, WorkerConfig, build_libint_parallel.sh, and all worker partitioning logic. The process-level parallelism produced incomplete output (missing CR header files) because generate_rr_code needs external symbols from ALL quartets but workers only discover their subset. Retains: type dispatch cache, CSE disable, braket tiebreaker, progress bar.
1 parent e696004 commit 41f1dad

4 files changed

Lines changed: 25 additions & 197 deletions

File tree

CMakeLists.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -270,8 +270,6 @@ option_with_print(LIBINT2_SINGLE_EVALTYPE
270270
"Generate single evaluator type (i.e. all tasks use the same evaluator). OFF is NYI" ON)
271271
option_with_default(LIBINT2_ENABLE_UNROLLING
272272
"Unroll shell sets into integrals (will unroll shell sets larger than N) (0 for never, N for N, 1000000000 for always)" 100)
273-
option_with_default(LIBINT2_NUM_WORKERS
274-
"Number of parallel build_libint processes for code generation (1 for serial)" 1)
275273
option_with_default(LIBINT2_ALIGN_SIZE
276274
"(EXPERT) if posix_memalign is available, this will specify alignment of Libint data, in units of
277275
sizeof(LIBINT2_REALTYPE). Default is to use built-in heuristics: system-determined for vectorization off (default) or veclen * sizeof(LIBINT2_REALTYPE) for vectorization on." 0)

bin/build_libint_parallel.sh

Lines changed: 0 additions & 54 deletions
This file was deleted.

src/bin/libint/build_libint.cc

Lines changed: 14 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232

3333
#include <boost/preprocessor.hpp>
3434
#include <chrono>
35-
#include <cstdlib>
3635
#include <fstream>
3736
#include <iomanip>
3837
#include <iostream>
@@ -71,55 +70,6 @@ using namespace libint2;
7170

7271
CodeGenProgress g_progress;
7372

74-
/// Process-level parallelism: each worker generates code for a subset of
75-
/// quartets. Configured via LIBINT_NUM_WORKERS and LIBINT_WORKER_ID env vars.
76-
struct WorkerConfig {
77-
unsigned int num_workers = 1;
78-
unsigned int worker_id = 0;
79-
std::unique_ptr<std::ofstream> si_frag; // static_init fragment
80-
std::unique_ptr<std::ofstream> ii_frag; // int_iface fragment
81-
82-
void init(const std::string& source_dir = "") {
83-
const char* nw = std::getenv("LIBINT_NUM_WORKERS");
84-
const char* wi = std::getenv("LIBINT_WORKER_ID");
85-
if (nw) num_workers = std::max(1u, static_cast<unsigned>(std::atoi(nw)));
86-
if (wi) worker_id = static_cast<unsigned>(std::atoi(wi));
87-
if (worker_id >= num_workers) worker_id = 0;
88-
}
89-
90-
void open_fragments(const std::string& source_dir) {
91-
if (!is_parallel()) return;
92-
si_frag.reset(new std::ofstream(source_dir + "libint2_static_init_" +
93-
std::to_string(worker_id) + ".inc"));
94-
ii_frag.reset(new std::ofstream(source_dir + "libint2_iface_internal_" +
95-
std::to_string(worker_id) + ".inc"));
96-
}
97-
98-
/// Write to static_init: either iface or fragment file
99-
void write_static_init(Libint2Iface* iface, const std::string& s) {
100-
if (si_frag)
101-
*si_frag << s << std::endl;
102-
else
103-
iface->to_static_init(s);
104-
}
105-
106-
/// Write to int_iface: either iface or fragment file
107-
void write_int_iface(Libint2Iface* iface, const std::string& s) {
108-
if (ii_frag)
109-
*ii_frag << s << std::endl;
110-
else
111-
iface->to_int_iface(s);
112-
}
113-
114-
/// Returns true if this worker should process the given global quartet index
115-
bool should_process(unsigned int global_idx) const {
116-
return (global_idx % num_workers) == worker_id;
117-
}
118-
119-
bool is_parallel() const { return num_workers > 1; }
120-
};
121-
static WorkerConfig g_worker;
122-
12373
enum ShellSetType {
12474
ShellSetType_Standard = LIBINT_SHELL_SET_STANDARD,
12575
ShellSetType_ORCA = LIBINT_SHELL_SET_ORCA
@@ -597,14 +547,14 @@ void build_onebody_1b_1k(std::ostream& os, std::string label,
597547
<< la << "][" << lb
598548
<< "] = " << context->label_to_function_name(eval_label)
599549
<< context->end_of_stat() << endl;
600-
g_worker.write_static_init(iface.get(), oss.str());
550+
iface->to_static_init(oss.str());
601551

602552
// need to declare this function internally
603553
for (std::deque<std::string>::const_iterator i = decl_filenames.begin();
604554
i != decl_filenames.end(); ++i) {
605555
oss.str("");
606556
oss << "#include <" << *i << ">" << endl;
607-
g_worker.write_int_iface(iface.get(), oss.str());
557+
iface->to_int_iface(oss.str());
608558
}
609559

610560
#if DEBUG
@@ -621,12 +571,6 @@ void build_onebody_1b_1k(std::ostream& os, std::string label,
621571
void try_main(int argc, char* argv[]) {
622572
std::ostream& os = cout;
623573

624-
g_worker.init();
625-
if (g_worker.is_parallel()) {
626-
std::cerr << " Worker " << g_worker.worker_id << " of "
627-
<< g_worker.num_workers << std::endl;
628-
}
629-
630574
// First must declare the tasks
631575
LibraryTaskManager& taskmgr = LibraryTaskManager::Instance();
632576
taskmgr.add("default");
@@ -1025,7 +969,6 @@ void try_main(int argc, char* argv[]) {
1025969
cparams->print(os);
1026970

1027971
g_progress.start();
1028-
g_worker.open_fragments(cparams->source_directory());
1029972

1030973
#ifdef LIBINT_INCLUDE_ONEBODY
1031974
for (unsigned int d = 0; d <= LIBINT_INCLUDE_ONEBODY; ++d) {
@@ -1087,41 +1030,6 @@ void try_main(int argc, char* argv[]) {
10871030

10881031
g_progress.finish();
10891032

1090-
// Close fragment files if in parallel mode
1091-
g_worker.si_frag.reset();
1092-
g_worker.ii_frag.reset();
1093-
1094-
// In parallel mode, non-zero workers exit here after generating source
1095-
// files and iface fragments. Worker 0 continues to merge and finalize.
1096-
if (g_worker.is_parallel() && g_worker.worker_id != 0) {
1097-
os << "Worker " << g_worker.worker_id << " finished." << endl;
1098-
return;
1099-
}
1100-
1101-
// Merge worker fragment files into the iface (worker 0 only)
1102-
if (g_worker.is_parallel()) {
1103-
for (unsigned int w = 0; w < g_worker.num_workers; ++w) {
1104-
auto read_and_append =
1105-
[&](const std::string& frag_name,
1106-
void (Libint2Iface::*writer)(const std::string&)) {
1107-
std::string fpath = cparams->source_directory() + frag_name;
1108-
std::ifstream fin(fpath);
1109-
if (fin.is_open()) {
1110-
std::string line;
1111-
while (std::getline(fin, line)) {
1112-
if (!line.empty()) (iface.get()->*writer)(line);
1113-
}
1114-
fin.close();
1115-
std::remove(fpath.c_str());
1116-
}
1117-
};
1118-
read_and_append("libint2_static_init_" + std::to_string(w) + ".inc",
1119-
&Libint2Iface::to_static_init);
1120-
read_and_append("libint2_iface_internal_" + std::to_string(w) + ".inc",
1121-
&Libint2Iface::to_int_iface);
1122-
}
1123-
}
1124-
11251033
// Generate code for the set-level RRs
11261034
std::deque<std::string> decl_filenames, def_filenames;
11271035
generate_rr_code(os, cparams, decl_filenames, def_filenames);
@@ -1245,7 +1153,6 @@ static void build_TwoPRep_2b_2k(
12451153

12461154
// Note: la, lb, lc, ld generate code for chemist notation (ab|O|cd), where O
12471155
// is a two-body operator.
1248-
unsigned int quartet_idx = 0;
12491156
for (unsigned int la = 0; la <= lmax; la++) {
12501157
for (unsigned int lb = 0; lb <= lmax; lb++) {
12511158
for (unsigned int lc = 0; lc <= lmax; lc++) {
@@ -1255,9 +1162,6 @@ static void build_TwoPRep_2b_2k(
12551162
bra_ket_coswappable))
12561163
continue;
12571164

1258-
// Worker filter: skip quartets not assigned to this worker
1259-
if (!g_worker.should_process(quartet_idx++)) continue;
1260-
12611165
// std::shared_ptr<Tactic> tactic(new ParticleDirectionTactic(la+lb >
12621166
// lc+ld ? false : true));
12631167
std::shared_ptr<Tactic> tactic(
@@ -1424,13 +1328,13 @@ static void build_TwoPRep_2b_2k(
14241328
<< la << "][" << lb << "][" << lc << "][" << ld
14251329
<< "] = " << context->label_to_function_name(eval_label)
14261330
<< context->end_of_stat() << endl;
1427-
g_worker.write_static_init(iface.get(), oss.str());
1331+
iface->to_static_init(oss.str());
14281332

14291333
// need to declare this function internally
14301334
for (auto& decl_filename : decl_filenames) {
14311335
oss.str("");
14321336
oss << "#include <" << decl_filename << ">" << endl;
1433-
g_worker.write_int_iface(iface.get(), oss.str());
1337+
iface->to_int_iface(oss.str());
14341338
}
14351339

14361340
#if DEBUG
@@ -1634,13 +1538,13 @@ void build_TwoPRep_1b_2k(std::ostream& os,
16341538
<< lbra << "][" << lc << "][" << ld
16351539
<< "] = " << context->label_to_function_name(label)
16361540
<< context->end_of_stat() << endl;
1637-
g_worker.write_static_init(iface.get(), oss.str());
1541+
iface->to_static_init(oss.str());
16381542

16391543
// need to declare this function internally
16401544
for (auto& decl_filename : decl_filenames) {
16411545
oss.str("");
16421546
oss << "#include <" << decl_filename << ">" << endl;
1643-
g_worker.write_int_iface(iface.get(), oss.str());
1547+
iface->to_int_iface(oss.str());
16441548
}
16451549

16461550
#if DEBUG
@@ -1833,13 +1737,13 @@ void build_TwoPRep_1b_1k(std::ostream& os,
18331737
<< lbra << "][" << lket
18341738
<< "] = " << context->label_to_function_name(label)
18351739
<< context->end_of_stat() << endl;
1836-
g_worker.write_static_init(iface.get(), oss.str());
1740+
iface->to_static_init(oss.str());
18371741

18381742
// need to declare this function internally
18391743
for (auto& decl_filename : decl_filenames) {
18401744
oss.str("");
18411745
oss << "#include <" << decl_filename << ">" << endl;
1842-
g_worker.write_int_iface(iface.get(), oss.str());
1746+
iface->to_int_iface(oss.str());
18431747
}
18441748

18451749
#if DEBUG
@@ -2031,15 +1935,15 @@ void build_R12kG12_2b_2k(std::ostream& os,
20311935
<< la << "][" << lb << "][" << lc << "][" << ld
20321936
<< "] = " << context->label_to_function_name(label)
20331937
<< context->end_of_stat() << endl;
2034-
g_worker.write_static_init(iface.get(), oss.str());
1938+
iface->to_static_init(oss.str());
20351939

20361940
// need to declare this function internally
20371941
for (std::deque<std::string>::const_iterator i =
20381942
decl_filenames.begin();
20391943
i != decl_filenames.end(); ++i) {
20401944
oss.str("");
20411945
oss << "#include <" << *i << ">" << endl;
2042-
g_worker.write_int_iface(iface.get(), oss.str());
1946+
iface->to_int_iface(oss.str());
20431947
}
20441948

20451949
#if DEBUG
@@ -2176,15 +2080,15 @@ void build_R12kG12_2b_2k_separate(
21762080
<< "[" << la << "][" << lb << "][" << lc << "][" << ld
21772081
<< "] = " << context->label_to_function_name(label)
21782082
<< context->end_of_stat() << endl;
2179-
g_worker.write_static_init(iface.get(), oss.str());
2083+
iface->to_static_init(oss.str());
21802084

21812085
// need to declare this function internally
21822086
for (std::deque<std::string>::const_iterator i =
21832087
decl_filenames.begin();
21842088
i != decl_filenames.end(); ++i) {
21852089
oss.str("");
21862090
oss << "#include <" << *i << ">" << endl;
2187-
g_worker.write_int_iface(iface.get(), oss.str());
2091+
iface->to_int_iface(oss.str());
21882092
}
21892093

21902094
#if DEBUG
@@ -2362,11 +2266,11 @@ void build_G12DKH_2b_2k(std::ostream& os,
23622266
<< la << "][" << lb << "][" << lc << "][" << ld
23632267
<< "] = " << context->label_to_function_name(label)
23642268
<< context->end_of_stat() << endl;
2365-
g_worker.write_static_init(iface.get(), oss.str());
2269+
iface->to_static_init(oss.str());
23662270

23672271
oss.str("");
23682272
oss << "#include <" << decl_filename << ">" << endl;
2369-
g_worker.write_int_iface(iface.get(), oss.str());
2273+
iface->to_int_iface(oss.str());
23702274

23712275
// For the most expensive (i.e. presumably complete) graph extract
23722276
// all precomputed quantities -- these will be members of the

src/lib/libint/CMakeLists.txt

Lines changed: 11 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -6,37 +6,17 @@ include(int_computed) # for macros.tex and features
66
# <<< Generate Library Source >>>
77

88
file(MAKE_DIRECTORY "${PROJECT_BINARY_DIR}/generated")
9-
if (LIBINT2_NUM_WORKERS GREATER 1)
10-
# Parallel code generation requires bash for background process management.
11-
# The script launches N-1 workers in parallel, then worker 0 merges results.
12-
find_program(BASH_EXECUTABLE bash REQUIRED)
13-
add_custom_command(
14-
OUTPUT
15-
"${PROJECT_BINARY_DIR}/generated/libint2_params.h"
16-
"${PROJECT_BINARY_DIR}/generated/HRRPart0bra0ket0pp.h"
17-
"${PROJECT_BINARY_DIR}/generated/HRRPart0bra0ket0pp.cc"
18-
COMMAND ${BASH_EXECUTABLE}
19-
"${PROJECT_SOURCE_DIR}/bin/build_libint_parallel.sh"
20-
$<TARGET_FILE:build_libint> ${LIBINT2_NUM_WORKERS}
21-
DEPENDS
22-
build_libint
23-
"${PROJECT_SOURCE_DIR}/bin/build_libint_parallel.sh"
24-
WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/generated"
25-
COMMENT "Generating Libint2 library source (${LIBINT2_NUM_WORKERS} workers)"
26-
)
27-
else()
28-
add_custom_command(
29-
OUTPUT
30-
"${PROJECT_BINARY_DIR}/generated/libint2_params.h"
31-
"${PROJECT_BINARY_DIR}/generated/HRRPart0bra0ket0pp.h"
32-
"${PROJECT_BINARY_DIR}/generated/HRRPart0bra0ket0pp.cc"
33-
COMMAND $<TARGET_FILE:build_libint>
34-
DEPENDS
35-
build_libint
36-
WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/generated"
37-
COMMENT "Generating Libint2 library source"
38-
)
39-
endif()
9+
add_custom_command(
10+
OUTPUT
11+
"${PROJECT_BINARY_DIR}/generated/libint2_params.h"
12+
"${PROJECT_BINARY_DIR}/generated/HRRPart0bra0ket0pp.h"
13+
"${PROJECT_BINARY_DIR}/generated/HRRPart0bra0ket0pp.cc"
14+
COMMAND $<TARGET_FILE:build_libint>
15+
DEPENDS
16+
build_libint
17+
WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/generated"
18+
COMMENT "Generating Libint2 library source"
19+
)
4020
add_custom_target(libint-library-generate DEPENDS "${PROJECT_BINARY_DIR}/generated/libint2_params.h")
4121

4222

0 commit comments

Comments
 (0)