From a79b4fd719f6033076546ba3720472c8b8d885b3 Mon Sep 17 00:00:00 2001 From: sapuplanta Date: Fri, 12 Jun 2026 23:36:54 +0800 Subject: [PATCH 01/15] Add initial hash table search step program --- hash_table_search_step.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 hash_table_search_step.cpp diff --git a/hash_table_search_step.cpp b/hash_table_search_step.cpp new file mode 100644 index 0000000..7f9c1df --- /dev/null +++ b/hash_table_search_step.cpp @@ -0,0 +1,17 @@ +// # ********************************************************* +// Program: hash_table_search_step.cpp +// Course: CCP6214 Algorithm Design and Analysis +// Lecture Class: TC2L +// Tutorial Class: TT5L +// Trimester: 2610 +// Member_1: Hew Wee Bo | hewweebo@gmail.com | 0128803121 +// Member_2: ID | NAME | EMAIL | PHONE +// Member_3: ID | JEVAANRAJ A/L RAJA KUMARAN | jevaanraj17@gmail.com | 0179651973 +// Member_4: ID | NAME | EMAIL | PHONE +// # ********************************************************* +// Task Distribution +// Member_1: Hew Wee Bo +// Member_2: +// Member_3: Jevaanraj +// Member_4: +// # ********************************************************* From b12cb2f9a17ea9d425dcf470fe2fff7716f6e3cc Mon Sep 17 00:00:00 2001 From: sapuplanta Date: Fri, 12 Jun 2026 23:53:40 +0800 Subject: [PATCH 02/15] Refactor hash table implementation and enhance code structure --- hash_table_search_step.cpp | 44 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/hash_table_search_step.cpp b/hash_table_search_step.cpp index 7f9c1df..3533cd2 100644 --- a/hash_table_search_step.cpp +++ b/hash_table_search_step.cpp @@ -15,3 +15,47 @@ // Member_3: Jevaanraj // Member_4: // # ********************************************************* + +/* Purpose: + Reads a dataset CSv, inserts all records into a hash table + using separate chaining (linked list), then searches for + a specified target key and logs every step of the search process + to an output file */ + +#include +#include +#include +#include +#include +using namespace std; + + +struct Record { + long long key; // 10-digit unique int + string value; // 5-letter lowercase string +}; + +struct Node { + Record data; + Node* next; +}; + +class HashTable { +private: + int tableSize; + vector table; + +public: + HashTable(int size) : tableSize(size), table(size, nullptr) {} + ~HashTable() { + for (int i = 0; i < tableSize; ++i) { + Node* current = table[i]; + while (current) { + Node* temp = current; + current = current->next; + delete temp; + } + } + } + + \ No newline at end of file From e2586e26956a720e4585ea2bed4a82eccc49a6d3 Mon Sep 17 00:00:00 2001 From: sapuplanta Date: Sat, 13 Jun 2026 00:10:05 +0800 Subject: [PATCH 03/15] Implement hash function and enhance searchWithSteps method for detailed logging --- hash_table_search_step.cpp | 46 +++++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/hash_table_search_step.cpp b/hash_table_search_step.cpp index 3533cd2..5f0b7db 100644 --- a/hash_table_search_step.cpp +++ b/hash_table_search_step.cpp @@ -58,4 +58,48 @@ class HashTable { } } - \ No newline at end of file + int hashFunction(long long key) const{ + return (int)((unsigned long long)key% (unsigned long long)tableSize); + } + + void insert(const Record& rec) { + int idx = hashFunction(rec.key); + Node* newNode = new Node(); + newNode->data = rec; + newNode->next = table[idx]; + table[idx] = newNode; + } + + bool searchWithSteps(long long targetKey, ofstream& out) const { + int idx = hashFunction(targetKey); + + out << "Searching for target: " << targetKey << "\n"; + out << "Hash bucket index : " << idx << "\n"; + out << "-------------------------------------------\n"; + + Node* curr = table[idx]; + int compareCount = 0; + + while (curr != nullptr) { + compareCount++; + out << "Comparison " << compareCount + << ": comparing with " << curr->data.key + << "/" << curr->data.value; + + if (curr->data.key == targetKey) { + out << " --> MATCH\n"; + out << "-------------------------------------------\n"; + out << targetKey << " = " + << curr->data.key << "/" << curr->data.value << "\n"; + return true; + } else { + out << " (no match)\n"; + } + curr = curr->next; + } + out << "-------------------------------------------\n"; + out << "-1 != " << targetKey << "\n"; + return false; + } +}; + From ee20131eacd40db792f9b5e39c1be7d993be8d58 Mon Sep 17 00:00:00 2001 From: sapuplanta Date: Sat, 13 Jun 2026 00:38:13 +0800 Subject: [PATCH 04/15] Add CSV parsing and output file handling for hash table search --- hash_table_search_step.cpp | 68 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/hash_table_search_step.cpp b/hash_table_search_step.cpp index 5f0b7db..6265204 100644 --- a/hash_table_search_step.cpp +++ b/hash_table_search_step.cpp @@ -103,3 +103,71 @@ class HashTable { } }; +vector parseCSV(const string& filename) { + vector records; + ifstream inFile(filename); + + if (!inFile.is_open()) { + cerr << "Error opening file: " << filename << endl; + return records; +} + +string line; +while (getline(inFile, line)) { + if (line.empty()) continue; + + if (!line.empty() && line.back() == '\r') { + line.pop_back(); + } + + stringstream ss(line); + string keyStr, valueStr; + + if (getline(ss, keyStr, ',') && getline(ss, valueStr)) { + try { + Record rec; + rec.key = stoll(keyStr); + rec.value = valueStr; + records.push_back(rec); + }catch (...){ + cerr << "Error parsing line: " << line << endl; + + } + } + } + + inFile.close(); + return records; +} + +string extractDatasetSize(const string& filename) { + size_t underPos = filename.rfind('_'); + size_t dotPos = filename.rfind('.'); + if (underPos != string::npos && dotPos != string::npos && underPos < dotPos) { + return filename.substr(underPos + 1, dotPos - underPos - 1); + } + return "unknown"; +} + +void runSearch(const HashTable& ht, + long long targetKey, + const string& datasetSizeStr) { + string outFilename = "dataset_" + datasetSizeStr + + "_hash_table_search_step_" + + to_string(target) + ".txt"; + ofstream outFile(outFilename); + if (!outFile.is_open()) { + cerr << "Error opening output file: " << outFilename << endl; + return; + } + + cout << "\n--- Searching for target: " << target << " ---\n"; + bool found = ht.searchWithSteps(targetKey, outFile); + if (found) { + cout << "Result: FOUND (" << target << " = " << target << ")\n"; + else { + cout << "Result: NOT FOUND (" << target << " != -1)\n"; + } + outFile.close(); + +} From 20bcd0ae92ad5ee9e3e9b63ec5490d746c6a9079 Mon Sep 17 00:00:00 2001 From: sapuplanta Date: Sat, 13 Jun 2026 01:11:38 +0800 Subject: [PATCH 05/15] Implement main function for hash table search and add error handling for dataset loading --- .vscode/settings.json | 3 +++ hash_table_search_step.cpp | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..70e34ec --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "C_Cpp.errorSquiggles": "disabled" +} \ No newline at end of file diff --git a/hash_table_search_step.cpp b/hash_table_search_step.cpp index 6265204..c92fd12 100644 --- a/hash_table_search_step.cpp +++ b/hash_table_search_step.cpp @@ -171,3 +171,40 @@ void runSearch(const HashTable& ht, outFile.close(); } + +int main(int argc, char* argv[]) { + if (argc < 2) { + cerr << "Usage: " << argv[0] << " \n"; + cerr << "Example: " << argv[0] << " dataset_1000.csv\n"; + return 1; + } + + string datasetFile = argv[1]; + string datasetSizeStr = extractDatasetSize(datasetFile); + + cout << "Reading dataset from: " << datasetFile << " ...\n"; + vector records = parseCSV(datasetFile); + + if (records.empty()) { + cerr << "ERROR: No records loaded. Check the file path and format.\n"; + return 1; + } + cout << "Loaded " << records.size() << " records.\n"; + + cout << "Building hash table with " << tableSize << " buckets ...\n"; + HashTable ht(tableSize); + + for (const Record& rec : records) { + ht.insert(rec); + } + cout << "Hash table built successfully.\n"; + + runSearch(ht, TARGET_FOUND, datasetSizeStr); + runSearch(ht, TARGET_NOT_FOUND, datasetSizeStr); + + cout << "\nDone.\n"; + return 0; + +} + + From 325d26675da064011b47967d98d2061d839a3639 Mon Sep 17 00:00:00 2001 From: sapuplanta Date: Sat, 13 Jun 2026 22:19:33 +0800 Subject: [PATCH 06/15] Add initial hahs table search program --- hash_table_search.cpp | 64 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 hash_table_search.cpp diff --git a/hash_table_search.cpp b/hash_table_search.cpp new file mode 100644 index 0000000..2a98503 --- /dev/null +++ b/hash_table_search.cpp @@ -0,0 +1,64 @@ +// # ********************************************************* +// Program: hash_table_search_step.cpp +// Course: CCP6214 Algorithm Design and Analysis +// Lecture Class: TC2L +// Tutorial Class: TT5L +// Trimester: 2610 +// Member_1: Hew Wee Bo | hewweebo@gmail.com | 0128803121 +// Member_2: ID | NAME | EMAIL | PHONE +// Member_3: ID | JEVAANRAJ A/L RAJA KUMARAN | jevaanraj17@gmail.com | 0179651973 +// Member_4: ID | NAME | EMAIL | PHONE +// # ********************************************************* +// Task Distribution +// Member_1: Hew Wee Bo +// Member_2: +// Member_3: Jevaanraj +// Member_4: +// # ********************************************************* + +/* Purpose + Measure the running time of hash table search for: + - Best Case + - Average Case + - Worst Case */ + + #include + #include + #include + #include + #include + #include + #include + + using namespace std; + using namespace chrono; + + struct Record { + Record data; + Node* next; + }; + + class HashTable { + private: + int tableSize; + int numElements; + vector table; + public: + HashTable(int size) + :tableSize(size). numElements(0), table(size, nullptr) {} + + ~HashTable(){ + for (int i=0; i < tableSize; i++) { + Node* curr= table[i]; + while (curr != nullptr) { + Node* temp = curr; + curr= curr->next; + delete temp; + } + } + } + + + + + } \ No newline at end of file From 933efddce2752bcd21280bcdf7a54682eb5f5501 Mon Sep 17 00:00:00 2001 From: sapuplanta Date: Sat, 13 Jun 2026 22:47:17 +0800 Subject: [PATCH 07/15] Implement hash table search methods --- hash_table_search.cpp | 77 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 76 insertions(+), 1 deletion(-) diff --git a/hash_table_search.cpp b/hash_table_search.cpp index 2a98503..cd80c0c 100644 --- a/hash_table_search.cpp +++ b/hash_table_search.cpp @@ -58,7 +58,82 @@ } } + int hashFunction(long long key) const{ + return (int)((unsigned long long)key % (unsigned long long)tableSize); + } + void insert(const Record& rec) { + int idx = hashFunction(rec.key); + Node* newNode = new Node(); + newNode->data = rec; + newNode->next = table[idx]; + table[idx] = newNode; + numElements++; + } + + Node* search(long long targetKey) const { + int idx = hashFunction(targetKey); + Node* curr = table[idx]; + while (curr != nullptr) { + if (curr->data.key == targetKey) { + return curr; + } + curr = curr->next; + } + return nullptr; + } + + long long getBestCaseKey() cosnt { + for (int i = 0; i < tableSize; ++i) { + if (table[i] != nullptr) { + return table[i]->data.key; + } + } + return LLONG_MIN; + } + + long long getLongestChainKey() const{ + int maxLen=0; + long long worstKey = LLONG_MIN; + + for (int i = 0; i < tableSize; i++){ + if (table[i]) == nullptr) continue; + + int len = 0; + Node* curr = table[i]; + Node* last = nullptr; + + while (curr != nullptr) { + len++; + last = curr; + curr = curr->next; + } + + if (len > maxLen) { + maxLen = len; + worstKey = last->data.key; + } + + } + return worstKey; + } + + vector getAllKeys() const { + vector keys; + for (int i = 0; i < tableSize; ++i) { + Node* curr = table[i]; + while (curr != nullptr) { + keys.push_back(curr->data.key); + curr = curr->next; + } + } + return keys; + } + int getNumElements() const { + return numElements; + int getTableSize() const { + return tableSize; + }; - + } \ No newline at end of file From 66d4130b9341c7b2170d7df93a3b7189588d585c Mon Sep 17 00:00:00 2001 From: sapuplanta Date: Sat, 13 Jun 2026 23:04:42 +0800 Subject: [PATCH 08/15] Implement CSV parsing and error handling in hash table search --- hash_table_search.cpp | 59 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/hash_table_search.cpp b/hash_table_search.cpp index cd80c0c..f40a9ef 100644 --- a/hash_table_search.cpp +++ b/hash_table_search.cpp @@ -135,5 +135,62 @@ return tableSize; }; - + vector parseCSV(const string& filename) { + vector records; + ifstream inFile(filename); + + if (!inFile.is_open()) { + cerr << "Error opening file: " << filename << endl; + return records; + } + + string line; + while (getline(inFile, line)) { + if (line.empty()) continue; + + if (!line.empty() && line.back() == '\r') { + line.pop_back(); + } + + stringstream ss(line); + string keyStr, valueStr; + + if (getline(ss, keyStr, ',') && getline(ss, valueStr)) { + try { + Record rec; + rec.key = stoll(keyStr); + rec.value = valueStr; + records.push_back(rec); + }catch (...){ + cerr << "Error parsing line: " << line << endl; + } + } + } + + inFile.close(); + return records; + + string extractDatasetSize(const string& filename) { + size_t underPos = filename.rfind('_'); + size_t dotPos = filename.rfind('.'); + if (underPos != string::npos && dotPos != string::npos && underPos < dotPos) { + return filename.substr(underPos + 1, dotPos - underPos - 1); + } + return "unknown"; + } + + int choosePrimeTableSize (int minSize) { + if (minSize < 2) return 2; + int candidate = (minSize % 2 == 0) ? minSize + 1 : minSize; + while (true) { + bool isPrime = true; + for (int i=2; (long long)i * i <= candidate; i++) { + if (candidate % i == 0) { + isPrime = false; + break; + } + } + if (isPrime) return candidate; + candidate += 2; + } } \ No newline at end of file From 2abcb4b81a12ab172661cf47af33bd1f54212eda Mon Sep 17 00:00:00 2001 From: sapuplanta Date: Sat, 13 Jun 2026 23:39:20 +0800 Subject: [PATCH 09/15] Implement main function for hash table search with dataset loading and timing analysis --- hash_table_search.cpp | 114 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 113 insertions(+), 1 deletion(-) diff --git a/hash_table_search.cpp b/hash_table_search.cpp index f40a9ef..ee970fd 100644 --- a/hash_table_search.cpp +++ b/hash_table_search.cpp @@ -193,4 +193,116 @@ if (isPrime) return candidate; candidate += 2; } - } \ No newline at end of file + + int main(int argc, char* argv[]) { + if (argc < 2) { + cerr << "Usage: " << argv[0] << " " << endl; + return 1; + } + + string filename = argv[1]; + string datasetSize = extractDatasetSize(filename); + cout << "Dataset Size: " << datasetSize << endl; + + return 1; + } + + string datasetFile = argv[1]; + string datasetSizeStr = extractDatasetSize(datasetFile); + + cout << "Reading dataset from: " << datasetFile << endl; + vector records = parseCSV(datasetFile); + + if (records.empty()) { + cerr << "No valid records found in the dataset." << endl; + return 1; + } + + int n = (int)records.size(); + cout << "Loaded " << n << " records." << endl; + + int tableSize = choosePrimeTableSize(n * 2); + cout << "Building hash table with " << tableSize << " buckets..." << endl; + + HashTable ht(tableSize); + for (const Record& rec : records) { + ht.insert(rec); + } + + cout << "Hash table built with " << ht.getNumElements() << " elements." << endl; + + long long bestCaseKey = ht.getBestCaseKey(); + long long worstCaseKey = ht.getLongestChainKey(); + vector allKeys = ht.getAllKeys(); + + cout << "Best case key: " << bestCaseKey << endl; + cout << "Worst case key: " << worstCaseKey << endl; + cout << "Average case key: " << allKeys[allKeys.size() / 2] << endl; + + cout << "TIming best case (" << n << " searches)..." << endl; + auto bcStart = high_resolution_clock::now(); + + volatile int bestFound = 0; + for (int i = 0; i < n; i++) { + Node* result = ht.search(bestCaseKey); + if (result != nullptr) bestFound++; + } + + auto bcEnd = high_resolution_clock::now(); + duration bestTime = duration_cast>(bcEnd - bcStart); + + + cout << "Timing average case (" << n << " searches)..." << endl; + auto acStart = high_resolution_clock::now(); + + volatile int avgFound = 0; + for (int i = 0; i < n; i++) { + Node* result = ht.search(allKeys[i]); + if (result != nullptr) avgFound++; + } + + auto acEnd = high_resolution_clock::now(); + duration avgTime = duration_cast>(acEnd - acStart); + + cout << "Timing worst case (" << n << " searches)..." << endl; + auto wcStart = high_resolution_clock::now(); + + volatile int worstFound = 0; + for (int i = 0; i < n; i++) { + Node* result = ht.search(worstCaseKey); + if (result != nullptr) worstFound++; + } + + auto wcEnd = high_resolution_clock::now(); + duration worstTime = duration_cast>(wcEnd - wcStart); + + string outFilename = "hash_table_search_dataset_" + datasetSizeStr + ".txt"; + ofstream outFile(outFilename); + + if (!outFile.is_open()) { + cerr << "ERROR: Cannot create output file: " << outFilename << "\n"; + return 1; + } + + cout << "\nResults for dataset size " << datasetSizeStr << ":\n"; + cout << "Best case: " << bestFound << "/" << n << " found, time = " << bestTime.count() << " seconds\n"; + cout << "Average case: " << avgFound << "/" << n << " found, time = " << avgTime.count() << " seconds\n"; + cout << "Worst case: " << worstFound << "/" << n << " found, time = " << worstTime.count() << " seconds\n"; + + outFile.close(); + + cout << "\n=========================================\n"; + cout << "Results for dataset size n = " << n << "\n"; + cout << "=========================================\n"; + cout << "Best case time: " << bestTime.count() << " seconds\n"; + cout << "Average case time: " << avgTime.count() << " seconds\n"; + cout << "Worst case time: " << worstTime.count() << " seconds\n"; + cout << "=========================================\n"; + cout << "Output written to: " << outFilename << "\n"; + + return 0; + } + + + + From 24134b151dbbb3048f6be304775e903fbdba1453 Mon Sep 17 00:00:00 2001 From: sapuplanta Date: Tue, 16 Jun 2026 12:41:02 +0800 Subject: [PATCH 10/15] Add prime table size selection and improve search output formatting --- .vscode/settings.json | 3 --- hash_table_search_step.cpp | 31 ++++++++++++++++++++++++++----- 2 files changed, 26 insertions(+), 8 deletions(-) delete mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 70e34ec..0000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "C_Cpp.errorSquiggles": "disabled" -} \ No newline at end of file diff --git a/hash_table_search_step.cpp b/hash_table_search_step.cpp index c92fd12..0b56b2d 100644 --- a/hash_table_search_step.cpp +++ b/hash_table_search_step.cpp @@ -29,6 +29,23 @@ #include using namespace std; +static int choosePrimeTableSize(int minSize) { + if (minSize < 2) return 2; + + int candidate = (minSize % 2 == 0) ? minSize + 1 : minSize; + while (true) { + bool isPrime = true; + for (int i = 2; (long long)i * i <= candidate; ++i) { + if (candidate % i == 0) { + isPrime = false; + break; + } + } + if (isPrime) return candidate; + candidate += 2; + } +} + struct Record { long long key; // 10-digit unique int @@ -154,19 +171,20 @@ void runSearch(const HashTable& ht, const string& datasetSizeStr) { string outFilename = "dataset_" + datasetSizeStr + "_hash_table_search_step_" - + to_string(target) + ".txt"; + + to_string(targetKey) + ".txt"; ofstream outFile(outFilename); if (!outFile.is_open()) { cerr << "Error opening output file: " << outFilename << endl; return; } - cout << "\n--- Searching for target: " << target << " ---\n"; + cout << "\n--- Searching for target: " << targetKey << " ---\n"; bool found = ht.searchWithSteps(targetKey, outFile); if (found) { - cout << "Result: FOUND (" << target << " = " << target << ")\n"; + cout << "Result: FOUND (" << targetKey << " = " << targetKey << ")\n"; + } else { - cout << "Result: NOT FOUND (" << target << " != -1)\n"; + cout << "Result: NOT FOUND (" << targetKey << " != -1)\n"; } outFile.close(); @@ -191,6 +209,7 @@ int main(int argc, char* argv[]) { } cout << "Loaded " << records.size() << " records.\n"; + int tableSize = choosePrimeTableSize((int)records.size() * 2); cout << "Building hash table with " << tableSize << " buckets ...\n"; HashTable ht(tableSize); @@ -199,6 +218,9 @@ int main(int argc, char* argv[]) { } cout << "Hash table built successfully.\n"; + long long TARGET_FOUND = records.front().key; + long long TARGET_NOT_FOUND = -1; + runSearch(ht, TARGET_FOUND, datasetSizeStr); runSearch(ht, TARGET_NOT_FOUND, datasetSizeStr); @@ -207,4 +229,3 @@ int main(int argc, char* argv[]) { } - From 705ff168dabd4841958ef10b6764d66a2feeafda Mon Sep 17 00:00:00 2001 From: sapuplanta Date: Wed, 17 Jun 2026 12:05:12 +0800 Subject: [PATCH 11/15] Fix struct definitions, correct syntax errors, and refactor CSV parsing in hash table search --- hash_table_search.cpp | 130 +++++++++++++++++++++--------------------- 1 file changed, 65 insertions(+), 65 deletions(-) diff --git a/hash_table_search.cpp b/hash_table_search.cpp index ee970fd..7526567 100644 --- a/hash_table_search.cpp +++ b/hash_table_search.cpp @@ -34,8 +34,13 @@ using namespace chrono; struct Record { - Record data; - Node* next; + long long key; + string value; + }; + + struct Node { + Record data; + Node* next; }; class HashTable { @@ -45,7 +50,7 @@ vector table; public: HashTable(int size) - :tableSize(size). numElements(0), table(size, nullptr) {} + :tableSize(size), numElements(0), table(size, nullptr) {} ~HashTable(){ for (int i=0; i < tableSize; i++) { @@ -82,7 +87,7 @@ return nullptr; } - long long getBestCaseKey() cosnt { + long long getBestCaseKey() const { for (int i = 0; i < tableSize; ++i) { if (table[i] != nullptr) { return table[i]->data.key; @@ -96,7 +101,7 @@ long long worstKey = LLONG_MIN; for (int i = 0; i < tableSize; i++){ - if (table[i]) == nullptr) continue; + if (table[i] == nullptr) continue; int len = 0; Node* curr = table[i]; @@ -131,55 +136,59 @@ int getNumElements() const { return numElements; + } + int getTableSize() const { return tableSize; - }; - - vector parseCSV(const string& filename) { - vector records; - ifstream inFile(filename); - - if (!inFile.is_open()) { - cerr << "Error opening file: " << filename << endl; - return records; - } - - string line; - while (getline(inFile, line)) { - if (line.empty()) continue; - - if (!line.empty() && line.back() == '\r') { - line.pop_back(); - } - - stringstream ss(line); - string keyStr, valueStr; - - if (getline(ss, keyStr, ',') && getline(ss, valueStr)) { - try { - Record rec; - rec.key = stoll(keyStr); - rec.value = valueStr; - records.push_back(rec); - }catch (...){ - cerr << "Error parsing line: " << line << endl; - } - } - } - - inFile.close(); - return records; + } + }; - string extractDatasetSize(const string& filename) { + vector parseCSV(const string& filename) { + vector records; + ifstream inFile(filename); + + if (!inFile.is_open()) { + cerr << "Error opening file: " << filename << endl; + return records; + } + + string line; + while (getline(inFile, line)) { + if (line.empty()) continue; + + if (!line.empty() && line.back() == '\r') { + line.pop_back(); + } + + stringstream ss(line); + string keyStr, valueStr; + + if (getline(ss, keyStr, ',') && getline(ss, valueStr)) { + try { + Record rec; + rec.key = stoll(keyStr); + rec.value = valueStr; + records.push_back(rec); + }catch (...){ + cerr << "Error parsing line: " << line << endl; + } + } + } + + inFile.close(); + return records; + } + + string extractDatasetSize(const string& filename) { size_t underPos = filename.rfind('_'); size_t dotPos = filename.rfind('.'); if (underPos != string::npos && dotPos != string::npos && underPos < dotPos) { return filename.substr(underPos + 1, dotPos - underPos - 1); } return "unknown"; - } + } - int choosePrimeTableSize (int minSize) { + int choosePrimeTableSize (int minSize) { if (minSize < 2) return 2; int candidate = (minSize % 2 == 0) ? minSize + 1 : minSize; while (true) { @@ -191,23 +200,18 @@ } } if (isPrime) return candidate; - candidate += 2; - } - - int main(int argc, char* argv[]) { - if (argc < 2) { - cerr << "Usage: " << argv[0] << " " << endl; - return 1; - } - - string filename = argv[1]; - string datasetSize = extractDatasetSize(filename); - cout << "Dataset Size: " << datasetSize << endl; + candidate += 2; + } + return -1; + } - return 1; - } + int main(int argc, char* argv[]) { + if (argc < 2) { + cerr << "Usage: " << argv[0] << " " << endl; + return 1; + } - string datasetFile = argv[1]; + string datasetFile = argv[1]; string datasetSizeStr = extractDatasetSize(datasetFile); cout << "Reading dataset from: " << datasetFile << endl; @@ -300,9 +304,5 @@ cout << "=========================================\n"; cout << "Output written to: " << outFilename << "\n"; - return 0; - } - - - - + return 0; + } From b9d63082e357f527d605130f162e180b550dd415 Mon Sep 17 00:00:00 2001 From: sapuplanta Date: Wed, 17 Jun 2026 12:16:00 +0800 Subject: [PATCH 12/15] Refactor output handling in main function to write results to file instead of console --- hash_table_search.cpp | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/hash_table_search.cpp b/hash_table_search.cpp index 7526567..808f94b 100644 --- a/hash_table_search.cpp +++ b/hash_table_search.cpp @@ -288,21 +288,23 @@ return 1; } - cout << "\nResults for dataset size " << datasetSizeStr << ":\n"; - cout << "Best case: " << bestFound << "/" << n << " found, time = " << bestTime.count() << " seconds\n"; - cout << "Average case: " << avgFound << "/" << n << " found, time = " << avgTime.count() << " seconds\n"; - cout << "Worst case: " << worstFound << "/" << n << " found, time = " << worstTime.count() << " seconds\n"; + outFile << "\nResults for dataset size " << datasetSizeStr << ":\n"; + outFile << "Best case: " << bestFound << "/" << n << " found, time = " << bestTime.count() << " seconds\n"; + outFile << "Average case: " << avgFound << "/" << n << " found, time = " << avgTime.count() << " seconds\n"; + outFile << "Worst case: " << worstFound << "/" << n << " found, time = " << worstTime.count() << " seconds\n"; + + outFile << "\n=========================================\n"; + outFile << "Results for dataset size n = " << n << "\n"; + outFile << "=========================================\n"; + outFile << "Best case time: " << bestTime.count() << " seconds\n"; + outFile << "Average case time: " << avgTime.count() << " seconds\n"; + outFile << "Worst case time: " << worstTime.count() << " seconds\n"; + outFile << "=========================================\n"; + outFile << "Output written to: " << outFilename << "\n"; outFile.close(); - cout << "\n=========================================\n"; - cout << "Results for dataset size n = " << n << "\n"; - cout << "=========================================\n"; - cout << "Best case time: " << bestTime.count() << " seconds\n"; - cout << "Average case time: " << avgTime.count() << " seconds\n"; - cout << "Worst case time: " << worstTime.count() << " seconds\n"; - cout << "=========================================\n"; - cout << "Output written to: " << outFilename << "\n"; + cout << "Results written to file: " << outFilename << "\n"; return 0; } From 8fcc2c39fc62bc0bafb7dc03e2c70688977ffe71 Mon Sep 17 00:00:00 2001 From: sapuplanta Date: Fri, 19 Jun 2026 23:30:37 +0800 Subject: [PATCH 13/15] Refactor CSV parsing and output handling to log errors and results to a report file --- hash_table_search_step.cpp | 59 +++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/hash_table_search_step.cpp b/hash_table_search_step.cpp index 0b56b2d..49ed8f5 100644 --- a/hash_table_search_step.cpp +++ b/hash_table_search_step.cpp @@ -57,6 +57,10 @@ struct Node { Node* next; }; +static string makeReportFilename(const string& datasetSizeStr) { + return "dataset_" + datasetSizeStr + "_hash_table_search_step.txt"; +} + class HashTable { private: int tableSize; @@ -120,12 +124,14 @@ class HashTable { } }; -vector parseCSV(const string& filename) { +vector parseCSV(const string& filename, ostream* report = nullptr) { vector records; ifstream inFile(filename); if (!inFile.is_open()) { - cerr << "Error opening file: " << filename << endl; + if (report) { + *report << "Error opening file: " << filename << "\n"; + } return records; } @@ -147,7 +153,9 @@ while (getline(inFile, line)) { rec.value = valueStr; records.push_back(rec); }catch (...){ - cerr << "Error parsing line: " << line << endl; + if (report) { + *report << "Error parsing line: " << line << "\n"; + } } } @@ -168,63 +176,56 @@ string extractDatasetSize(const string& filename) { void runSearch(const HashTable& ht, long long targetKey, - const string& datasetSizeStr) { - string outFilename = "dataset_" + datasetSizeStr - + "_hash_table_search_step_" - + to_string(targetKey) + ".txt"; - ofstream outFile(outFilename); - if (!outFile.is_open()) { - cerr << "Error opening output file: " << outFilename << endl; - return; - } - - cout << "\n--- Searching for target: " << targetKey << " ---\n"; + ofstream& outFile) { + outFile << "\n--- Searching for target: " << targetKey << " ---\n"; bool found = ht.searchWithSteps(targetKey, outFile); if (found) { - cout << "Result: FOUND (" << targetKey << " = " << targetKey << ")\n"; + outFile << "Result: FOUND (" << targetKey << " = " << targetKey << ")\n"; } else { - cout << "Result: NOT FOUND (" << targetKey << " != -1)\n"; + outFile << "Result: NOT FOUND (" << targetKey << " != -1)\n"; } - outFile.close(); - } int main(int argc, char* argv[]) { if (argc < 2) { - cerr << "Usage: " << argv[0] << " \n"; - cerr << "Example: " << argv[0] << " dataset_1000.csv\n"; return 1; } string datasetFile = argv[1]; string datasetSizeStr = extractDatasetSize(datasetFile); + string reportFilename = makeReportFilename(datasetSizeStr); + ofstream reportFile(reportFilename); + + if (!reportFile.is_open()) { + return 1; + } - cout << "Reading dataset from: " << datasetFile << " ...\n"; - vector records = parseCSV(datasetFile); + reportFile << "Reading dataset from: " << datasetFile << " ...\n"; + vector records = parseCSV(datasetFile, &reportFile); if (records.empty()) { - cerr << "ERROR: No records loaded. Check the file path and format.\n"; + reportFile << "ERROR: No records loaded. Check the file path and format.\n"; return 1; } - cout << "Loaded " << records.size() << " records.\n"; + reportFile << "Loaded " << records.size() << " records.\n"; int tableSize = choosePrimeTableSize((int)records.size() * 2); - cout << "Building hash table with " << tableSize << " buckets ...\n"; + reportFile << "Building hash table with " << tableSize << " buckets ...\n"; HashTable ht(tableSize); for (const Record& rec : records) { ht.insert(rec); } - cout << "Hash table built successfully.\n"; + reportFile << "Hash table built successfully.\n"; long long TARGET_FOUND = records.front().key; long long TARGET_NOT_FOUND = -1; - runSearch(ht, TARGET_FOUND, datasetSizeStr); - runSearch(ht, TARGET_NOT_FOUND, datasetSizeStr); + runSearch(ht, TARGET_FOUND, reportFile); + runSearch(ht, TARGET_NOT_FOUND, reportFile); - cout << "\nDone.\n"; + reportFile << "\nDone.\n"; return 0; } From 7714068d4e4dfd88bbacf113991f8b8c0c93cf81 Mon Sep 17 00:00:00 2001 From: sapuplanta Date: Sat, 20 Jun 2026 00:17:25 +0800 Subject: [PATCH 14/15] Refactor output handling to write logs to a file instead of console --- hash_table_search.cpp | 46 ++++++++++++++++++++------------------ hash_table_search_step.cpp | 25 ++++++++++++--------- 2 files changed, 38 insertions(+), 33 deletions(-) diff --git a/hash_table_search.cpp b/hash_table_search.cpp index 808f94b..04c78c4 100644 --- a/hash_table_search.cpp +++ b/hash_table_search.cpp @@ -223,27 +223,27 @@ } int n = (int)records.size(); - cout << "Loaded " << n << " records." << endl; + // cout << "Loaded " << n << " records." << endl; int tableSize = choosePrimeTableSize(n * 2); - cout << "Building hash table with " << tableSize << " buckets..." << endl; + // cout << "Building hash table with " << tableSize << " buckets..." << endl; HashTable ht(tableSize); for (const Record& rec : records) { ht.insert(rec); } - cout << "Hash table built with " << ht.getNumElements() << " elements." << endl; + // cout << "Hash table built with " << ht.getNumElements() << " elements." << endl; long long bestCaseKey = ht.getBestCaseKey(); long long worstCaseKey = ht.getLongestChainKey(); vector allKeys = ht.getAllKeys(); - cout << "Best case key: " << bestCaseKey << endl; - cout << "Worst case key: " << worstCaseKey << endl; - cout << "Average case key: " << allKeys[allKeys.size() / 2] << endl; + // cout << "Best case key: " << bestCaseKey << endl; + //cout << "Worst case key: " << worstCaseKey << endl; + // cout << "Average case key: " << allKeys[allKeys.size() / 2] << endl; - cout << "TIming best case (" << n << " searches)..." << endl; + // cout << "TIming best case (" << n << " searches)..." << endl; auto bcStart = high_resolution_clock::now(); volatile int bestFound = 0; @@ -253,10 +253,10 @@ } auto bcEnd = high_resolution_clock::now(); - duration bestTime = duration_cast>(bcEnd - bcStart); + duration bestTime = duration_cast>(bcEnd - bcStart); - cout << "Timing average case (" << n << " searches)..." << endl; + // cout << "Timing average case (" << n << " searches)..." << endl; auto acStart = high_resolution_clock::now(); volatile int avgFound = 0; @@ -266,9 +266,9 @@ } auto acEnd = high_resolution_clock::now(); - duration avgTime = duration_cast>(acEnd - acStart); + duration avgTime = duration_cast>(acEnd - acStart); - cout << "Timing worst case (" << n << " searches)..." << endl; + // cout << "Timing worst case (" << n << " searches)..." << endl; auto wcStart = high_resolution_clock::now(); volatile int worstFound = 0; @@ -278,7 +278,7 @@ } auto wcEnd = high_resolution_clock::now(); - duration worstTime = duration_cast>(wcEnd - wcStart); + duration worstTime = duration_cast>(wcEnd - wcStart); string outFilename = "hash_table_search_dataset_" + datasetSizeStr + ".txt"; ofstream outFile(outFilename); @@ -289,17 +289,19 @@ } outFile << "\nResults for dataset size " << datasetSizeStr << ":\n"; - outFile << "Best case: " << bestFound << "/" << n << " found, time = " << bestTime.count() << " seconds\n"; - outFile << "Average case: " << avgFound << "/" << n << " found, time = " << avgTime.count() << " seconds\n"; - outFile << "Worst case: " << worstFound << "/" << n << " found, time = " << worstTime.count() << " seconds\n"; - - outFile << "\n=========================================\n"; + outFile << "Best case: " << bestFound << "/" << n << " found\n Time = " << bestTime.count() << " milliseconds\n"; + outFile << endl; + outFile << "Average case: " << avgFound << "/" << n << " found\n Time = " << avgTime.count() << " milliseconds\n"; + outFile << endl; + outFile << "Worst case: " << worstFound << "/" << n << " found\n Time = " << worstTime.count() << " milliseconds\n"; + outFile << endl; + outFile << "=======================================================\n"; + outFile << endl; outFile << "Results for dataset size n = " << n << "\n"; - outFile << "=========================================\n"; - outFile << "Best case time: " << bestTime.count() << " seconds\n"; - outFile << "Average case time: " << avgTime.count() << " seconds\n"; - outFile << "Worst case time: " << worstTime.count() << " seconds\n"; - outFile << "=========================================\n"; + outFile << "Best case time: " << bestTime.count() << " milliseconds\n"; + outFile << "Average case time: " << avgTime.count() << " milliseconds\n"; + outFile << "Worst case time: " << worstTime.count() << " milliseconds\n"; + outFile << endl; outFile << "Output written to: " << outFilename << "\n"; outFile.close(); diff --git a/hash_table_search_step.cpp b/hash_table_search_step.cpp index 49ed8f5..54f16de 100644 --- a/hash_table_search_step.cpp +++ b/hash_table_search_step.cpp @@ -195,37 +195,40 @@ int main(int argc, char* argv[]) { string datasetFile = argv[1]; string datasetSizeStr = extractDatasetSize(datasetFile); string reportFilename = makeReportFilename(datasetSizeStr); - ofstream reportFile(reportFilename); + ofstream outFile(reportFilename); - if (!reportFile.is_open()) { + if (!outFile.is_open()) { return 1; } - reportFile << "Reading dataset from: " << datasetFile << " ...\n"; - vector records = parseCSV(datasetFile, &reportFile); + outFile << "Reading dataset from: " << datasetFile << " ...\n"; + cout << "Reading dataset from: " << datasetFile << endl; + vector records = parseCSV(datasetFile, &outFile); if (records.empty()) { - reportFile << "ERROR: No records loaded. Check the file path and format.\n"; + outFile << "ERROR: No records loaded. Check the file path and format.\n"; return 1; } - reportFile << "Loaded " << records.size() << " records.\n"; + outFile << "Loaded " << records.size() << " records.\n"; int tableSize = choosePrimeTableSize((int)records.size() * 2); - reportFile << "Building hash table with " << tableSize << " buckets ...\n"; + outFile << "Building hash table with " << tableSize << " buckets ...\n"; HashTable ht(tableSize); for (const Record& rec : records) { ht.insert(rec); } - reportFile << "Hash table built successfully.\n"; + outFile << "Hash table built successfully.\n"; long long TARGET_FOUND = records.front().key; long long TARGET_NOT_FOUND = -1; - runSearch(ht, TARGET_FOUND, reportFile); - runSearch(ht, TARGET_NOT_FOUND, reportFile); + runSearch(ht, TARGET_FOUND, outFile); + runSearch(ht, TARGET_NOT_FOUND, outFile); + + outFile << "\nDone.\n"; + cout << "Results written to file: " << reportFilename << "\n"; - reportFile << "\nDone.\n"; return 0; } From 520fdc051e2fe5bfa4858c71fe10f30b191a551a Mon Sep 17 00:00:00 2001 From: sapuplanta Date: Sat, 20 Jun 2026 00:22:13 +0800 Subject: [PATCH 15/15] Add build instructions for hash table search and its step implementation --- readme.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/readme.md b/readme.md index 0adfeb0..568b04c 100644 --- a/readme.md +++ b/readme.md @@ -4,6 +4,8 @@ g++ -o dataset_generator dataset_generator.cpp g++ -o radix_sort radix_sort.cpp -lm g++ -o radix_sort_step radix_sort_step.cpp -lm +g++ hash_table_search_step.cpp -o hash_table_search_step +g++ hash_table_search.cpp -o hash_table_search ``` ## Generate test data @@ -31,3 +33,16 @@ Performs LSD radix sort on each file, outputs `radix_sorted_dataset_*.csv` with ``` Outputs step-by-step trace of radix sort on rows 1-7. Generates `dataset_1000_radix_sorted_step_1_7.txt` showing array state after each digit pass (d=10 to d=1). + + +## Hash Table Search Step + +```sh +./hash_table_search_step dataset_1000.csv +``` + +## Hash Table Search + +```sh +./hash_table_search dataset_1000.csv +``` \ No newline at end of file