diff --git a/hash_table_search.cpp b/hash_table_search.cpp new file mode 100644 index 0000000..04c78c4 --- /dev/null +++ b/hash_table_search.cpp @@ -0,0 +1,312 @@ +// # ********************************************************* +// Program: hash_table_search_step.cpp +// Course: CCP6214 Algorithm Design and Analysis +// Lecture Class: TC2L +// Tutorial Class: TT5L +// Trimester: 2610 +// Member_1: Hew Wee Bo | hewweebo@gmail.com | 0128803121 +// Member_2: ID | NAME | EMAIL | PHONE +// Member_3: ID | JEVAANRAJ A/L RAJA KUMARAN | jevaanraj17@gmail.com | 0179651973 +// Member_4: ID | NAME | EMAIL | PHONE +// # ********************************************************* +// Task Distribution +// Member_1: Hew Wee Bo +// Member_2: +// Member_3: Jevaanraj +// Member_4: +// # ********************************************************* + +/* Purpose + Measure the running time of hash table search for: + - Best Case + - Average Case + - Worst Case */ + + #include + #include + #include + #include + #include + #include + #include + + using namespace std; + using namespace chrono; + + struct Record { + long long key; + string value; + }; + + struct Node { + Record data; + Node* next; + }; + + class HashTable { + private: + int tableSize; + int numElements; + vector table; + public: + HashTable(int size) + :tableSize(size), numElements(0), table(size, nullptr) {} + + ~HashTable(){ + for (int i=0; i < tableSize; i++) { + Node* curr= table[i]; + while (curr != nullptr) { + Node* temp = curr; + curr= curr->next; + delete temp; + } + } + } + + int hashFunction(long long key) const{ + return (int)((unsigned long long)key % (unsigned long long)tableSize); + } + void insert(const Record& rec) { + int idx = hashFunction(rec.key); + Node* newNode = new Node(); + newNode->data = rec; + newNode->next = table[idx]; + table[idx] = newNode; + numElements++; + } + + Node* search(long long targetKey) const { + int idx = hashFunction(targetKey); + Node* curr = table[idx]; + while (curr != nullptr) { + if (curr->data.key == targetKey) { + return curr; + } + curr = curr->next; + } + return nullptr; + } + + long long getBestCaseKey() const { + for (int i = 0; i < tableSize; ++i) { + if (table[i] != nullptr) { + return table[i]->data.key; + } + } + return LLONG_MIN; + } + + long long getLongestChainKey() const{ + int maxLen=0; + long long worstKey = LLONG_MIN; + + for (int i = 0; i < tableSize; i++){ + if (table[i] == nullptr) continue; + + int len = 0; + Node* curr = table[i]; + Node* last = nullptr; + + while (curr != nullptr) { + len++; + last = curr; + curr = curr->next; + } + + if (len > maxLen) { + maxLen = len; + worstKey = last->data.key; + } + + } + return worstKey; + } + + vector getAllKeys() const { + vector keys; + for (int i = 0; i < tableSize; ++i) { + Node* curr = table[i]; + while (curr != nullptr) { + keys.push_back(curr->data.key); + curr = curr->next; + } + } + return keys; + } + + int getNumElements() const { + return numElements; + } + + int getTableSize() const { + return tableSize; + } + }; + + vector parseCSV(const string& filename) { + vector records; + ifstream inFile(filename); + + if (!inFile.is_open()) { + cerr << "Error opening file: " << filename << endl; + return records; + } + + string line; + while (getline(inFile, line)) { + if (line.empty()) continue; + + if (!line.empty() && line.back() == '\r') { + line.pop_back(); + } + + stringstream ss(line); + string keyStr, valueStr; + + if (getline(ss, keyStr, ',') && getline(ss, valueStr)) { + try { + Record rec; + rec.key = stoll(keyStr); + rec.value = valueStr; + records.push_back(rec); + }catch (...){ + cerr << "Error parsing line: " << line << endl; + } + } + } + + inFile.close(); + return records; + } + + string extractDatasetSize(const string& filename) { + size_t underPos = filename.rfind('_'); + size_t dotPos = filename.rfind('.'); + if (underPos != string::npos && dotPos != string::npos && underPos < dotPos) { + return filename.substr(underPos + 1, dotPos - underPos - 1); + } + return "unknown"; + } + + int choosePrimeTableSize (int minSize) { + if (minSize < 2) return 2; + int candidate = (minSize % 2 == 0) ? minSize + 1 : minSize; + while (true) { + bool isPrime = true; + for (int i=2; (long long)i * i <= candidate; i++) { + if (candidate % i == 0) { + isPrime = false; + break; + } + } + if (isPrime) return candidate; + candidate += 2; + } + return -1; + } + + int main(int argc, char* argv[]) { + if (argc < 2) { + cerr << "Usage: " << argv[0] << " " << endl; + return 1; + } + + string datasetFile = argv[1]; + string datasetSizeStr = extractDatasetSize(datasetFile); + + cout << "Reading dataset from: " << datasetFile << endl; + vector records = parseCSV(datasetFile); + + if (records.empty()) { + cerr << "No valid records found in the dataset." << endl; + return 1; + } + + int n = (int)records.size(); + // cout << "Loaded " << n << " records." << endl; + + int tableSize = choosePrimeTableSize(n * 2); + // cout << "Building hash table with " << tableSize << " buckets..." << endl; + + HashTable ht(tableSize); + for (const Record& rec : records) { + ht.insert(rec); + } + + // cout << "Hash table built with " << ht.getNumElements() << " elements." << endl; + + long long bestCaseKey = ht.getBestCaseKey(); + long long worstCaseKey = ht.getLongestChainKey(); + vector allKeys = ht.getAllKeys(); + + // cout << "Best case key: " << bestCaseKey << endl; + //cout << "Worst case key: " << worstCaseKey << endl; + // cout << "Average case key: " << allKeys[allKeys.size() / 2] << endl; + + // cout << "TIming best case (" << n << " searches)..." << endl; + auto bcStart = high_resolution_clock::now(); + + volatile int bestFound = 0; + for (int i = 0; i < n; i++) { + Node* result = ht.search(bestCaseKey); + if (result != nullptr) bestFound++; + } + + auto bcEnd = high_resolution_clock::now(); + duration bestTime = duration_cast>(bcEnd - bcStart); + + + // cout << "Timing average case (" << n << " searches)..." << endl; + auto acStart = high_resolution_clock::now(); + + volatile int avgFound = 0; + for (int i = 0; i < n; i++) { + Node* result = ht.search(allKeys[i]); + if (result != nullptr) avgFound++; + } + + auto acEnd = high_resolution_clock::now(); + duration avgTime = duration_cast>(acEnd - acStart); + + // cout << "Timing worst case (" << n << " searches)..." << endl; + auto wcStart = high_resolution_clock::now(); + + volatile int worstFound = 0; + for (int i = 0; i < n; i++) { + Node* result = ht.search(worstCaseKey); + if (result != nullptr) worstFound++; + } + + auto wcEnd = high_resolution_clock::now(); + duration worstTime = duration_cast>(wcEnd - wcStart); + + string outFilename = "hash_table_search_dataset_" + datasetSizeStr + ".txt"; + ofstream outFile(outFilename); + + if (!outFile.is_open()) { + cerr << "ERROR: Cannot create output file: " << outFilename << "\n"; + return 1; + } + + outFile << "\nResults for dataset size " << datasetSizeStr << ":\n"; + outFile << "Best case: " << bestFound << "/" << n << " found\n Time = " << bestTime.count() << " milliseconds\n"; + outFile << endl; + outFile << "Average case: " << avgFound << "/" << n << " found\n Time = " << avgTime.count() << " milliseconds\n"; + outFile << endl; + outFile << "Worst case: " << worstFound << "/" << n << " found\n Time = " << worstTime.count() << " milliseconds\n"; + outFile << endl; + outFile << "=======================================================\n"; + outFile << endl; + outFile << "Results for dataset size n = " << n << "\n"; + outFile << "Best case time: " << bestTime.count() << " milliseconds\n"; + outFile << "Average case time: " << avgTime.count() << " milliseconds\n"; + outFile << "Worst case time: " << worstTime.count() << " milliseconds\n"; + outFile << endl; + outFile << "Output written to: " << outFilename << "\n"; + + outFile.close(); + + cout << "Results written to file: " << outFilename << "\n"; + + return 0; + } diff --git a/hash_table_search_step.cpp b/hash_table_search_step.cpp new file mode 100644 index 0000000..54f16de --- /dev/null +++ b/hash_table_search_step.cpp @@ -0,0 +1,235 @@ +// # ********************************************************* +// Program: hash_table_search_step.cpp +// Course: CCP6214 Algorithm Design and Analysis +// Lecture Class: TC2L +// Tutorial Class: TT5L +// Trimester: 2610 +// Member_1: Hew Wee Bo | hewweebo@gmail.com | 0128803121 +// Member_2: ID | NAME | EMAIL | PHONE +// Member_3: ID | JEVAANRAJ A/L RAJA KUMARAN | jevaanraj17@gmail.com | 0179651973 +// Member_4: ID | NAME | EMAIL | PHONE +// # ********************************************************* +// Task Distribution +// Member_1: Hew Wee Bo +// Member_2: +// Member_3: Jevaanraj +// Member_4: +// # ********************************************************* + +/* Purpose: + Reads a dataset CSv, inserts all records into a hash table + using separate chaining (linked list), then searches for + a specified target key and logs every step of the search process + to an output file */ + +#include +#include +#include +#include +#include +using namespace std; + +static int choosePrimeTableSize(int minSize) { + if (minSize < 2) return 2; + + int candidate = (minSize % 2 == 0) ? minSize + 1 : minSize; + while (true) { + bool isPrime = true; + for (int i = 2; (long long)i * i <= candidate; ++i) { + if (candidate % i == 0) { + isPrime = false; + break; + } + } + if (isPrime) return candidate; + candidate += 2; + } +} + + +struct Record { + long long key; // 10-digit unique int + string value; // 5-letter lowercase string +}; + +struct Node { + Record data; + Node* next; +}; + +static string makeReportFilename(const string& datasetSizeStr) { + return "dataset_" + datasetSizeStr + "_hash_table_search_step.txt"; +} + +class HashTable { +private: + int tableSize; + vector table; + +public: + HashTable(int size) : tableSize(size), table(size, nullptr) {} + ~HashTable() { + for (int i = 0; i < tableSize; ++i) { + Node* current = table[i]; + while (current) { + Node* temp = current; + current = current->next; + delete temp; + } + } + } + + int hashFunction(long long key) const{ + return (int)((unsigned long long)key% (unsigned long long)tableSize); + } + + void insert(const Record& rec) { + int idx = hashFunction(rec.key); + Node* newNode = new Node(); + newNode->data = rec; + newNode->next = table[idx]; + table[idx] = newNode; + } + + bool searchWithSteps(long long targetKey, ofstream& out) const { + int idx = hashFunction(targetKey); + + out << "Searching for target: " << targetKey << "\n"; + out << "Hash bucket index : " << idx << "\n"; + out << "-------------------------------------------\n"; + + Node* curr = table[idx]; + int compareCount = 0; + + while (curr != nullptr) { + compareCount++; + out << "Comparison " << compareCount + << ": comparing with " << curr->data.key + << "/" << curr->data.value; + + if (curr->data.key == targetKey) { + out << " --> MATCH\n"; + out << "-------------------------------------------\n"; + out << targetKey << " = " + << curr->data.key << "/" << curr->data.value << "\n"; + return true; + } else { + out << " (no match)\n"; + } + curr = curr->next; + } + out << "-------------------------------------------\n"; + out << "-1 != " << targetKey << "\n"; + return false; + } +}; + +vector parseCSV(const string& filename, ostream* report = nullptr) { + vector records; + ifstream inFile(filename); + + if (!inFile.is_open()) { + if (report) { + *report << "Error opening file: " << filename << "\n"; + } + return records; +} + +string line; +while (getline(inFile, line)) { + if (line.empty()) continue; + + if (!line.empty() && line.back() == '\r') { + line.pop_back(); + } + + stringstream ss(line); + string keyStr, valueStr; + + if (getline(ss, keyStr, ',') && getline(ss, valueStr)) { + try { + Record rec; + rec.key = stoll(keyStr); + rec.value = valueStr; + records.push_back(rec); + }catch (...){ + if (report) { + *report << "Error parsing line: " << line << "\n"; + } + + } + } + } + + inFile.close(); + return records; +} + +string extractDatasetSize(const string& filename) { + size_t underPos = filename.rfind('_'); + size_t dotPos = filename.rfind('.'); + if (underPos != string::npos && dotPos != string::npos && underPos < dotPos) { + return filename.substr(underPos + 1, dotPos - underPos - 1); + } + return "unknown"; +} + +void runSearch(const HashTable& ht, + long long targetKey, + ofstream& outFile) { + outFile << "\n--- Searching for target: " << targetKey << " ---\n"; + bool found = ht.searchWithSteps(targetKey, outFile); + if (found) { + outFile << "Result: FOUND (" << targetKey << " = " << targetKey << ")\n"; + } + else { + outFile << "Result: NOT FOUND (" << targetKey << " != -1)\n"; + } +} + +int main(int argc, char* argv[]) { + if (argc < 2) { + return 1; + } + + string datasetFile = argv[1]; + string datasetSizeStr = extractDatasetSize(datasetFile); + string reportFilename = makeReportFilename(datasetSizeStr); + ofstream outFile(reportFilename); + + if (!outFile.is_open()) { + return 1; + } + + outFile << "Reading dataset from: " << datasetFile << " ...\n"; + cout << "Reading dataset from: " << datasetFile << endl; + vector records = parseCSV(datasetFile, &outFile); + + if (records.empty()) { + outFile << "ERROR: No records loaded. Check the file path and format.\n"; + return 1; + } + outFile << "Loaded " << records.size() << " records.\n"; + + int tableSize = choosePrimeTableSize((int)records.size() * 2); + outFile << "Building hash table with " << tableSize << " buckets ...\n"; + HashTable ht(tableSize); + + for (const Record& rec : records) { + ht.insert(rec); + } + outFile << "Hash table built successfully.\n"; + + long long TARGET_FOUND = records.front().key; + long long TARGET_NOT_FOUND = -1; + + runSearch(ht, TARGET_FOUND, outFile); + runSearch(ht, TARGET_NOT_FOUND, outFile); + + outFile << "\nDone.\n"; + cout << "Results written to file: " << reportFilename << "\n"; + + return 0; + +} + diff --git a/readme.md b/readme.md index 0adfeb0..568b04c 100644 --- a/readme.md +++ b/readme.md @@ -4,6 +4,8 @@ g++ -o dataset_generator dataset_generator.cpp g++ -o radix_sort radix_sort.cpp -lm g++ -o radix_sort_step radix_sort_step.cpp -lm +g++ hash_table_search_step.cpp -o hash_table_search_step +g++ hash_table_search.cpp -o hash_table_search ``` ## Generate test data @@ -31,3 +33,16 @@ Performs LSD radix sort on each file, outputs `radix_sorted_dataset_*.csv` with ``` Outputs step-by-step trace of radix sort on rows 1-7. Generates `dataset_1000_radix_sorted_step_1_7.txt` showing array state after each digit pass (d=10 to d=1). + + +## Hash Table Search Step + +```sh +./hash_table_search_step dataset_1000.csv +``` + +## Hash Table Search + +```sh +./hash_table_search dataset_1000.csv +``` \ No newline at end of file