diff --git a/bin/example1 b/bin/example1 new file mode 100755 index 0000000..1e1ca9b Binary files /dev/null and b/bin/example1 differ diff --git a/bin/example2 b/bin/example2 new file mode 100755 index 0000000..87f917f Binary files /dev/null and b/bin/example2 differ diff --git a/bin/example3 b/bin/example3 new file mode 100755 index 0000000..6e8eb4e Binary files /dev/null and b/bin/example3 differ diff --git a/bin/example4 b/bin/example4 new file mode 100755 index 0000000..e5969b2 Binary files /dev/null and b/bin/example4 differ diff --git a/example/best_buy.cpp b/example/best_buy.cpp new file mode 100644 index 0000000..4570f3c --- /dev/null +++ b/example/best_buy.cpp @@ -0,0 +1,100 @@ +#include +#include +#include +#include +#include +#include +#include +#include "../src/RecordLoader.h" +#include "../src/BitmapIterator.h" +#include "../src/BitmapConstructor.h" + +using namespace std; + +// Define a mutex for thread-safe appending to the output string +mutex counts_mutex; + +// Map to store the counts for each JSONPath query +unordered_map counts; + + +// The query function processes the JSON records according to the specified keys +void query(BitmapIterator* iter) { + if (iter->isObject()) { + // Extracting the "categoryPath" IDs as per the BB JSONPath Query + if (iter->moveToKey("categoryPath")) { + if (iter->down() && iter->isArray()) { + for (int idx = 1; idx <= 2; idx++) { // Adjusted to get the 2nd and 3rd elements + if (iter->moveToIndex(idx)) { + if (iter->down() && iter->isObject() && iter->moveToKey("id")) { + counts_mutex.lock(); + counts["BB"]++; // Increment the count for the "BB" query + counts_mutex.unlock(); + + } + iter->up(); + } + } + iter->up(); + } + } + } +} + + +// Function to process a subset of records +void process_records(RecordSet* record_set, int start, int end) { + for (int i = start; i < end; i++) { + Bitmap* bm = BitmapConstructor::construct((*record_set)[i], 1, 3); + BitmapIterator* iter = BitmapConstructor::getIterator(bm); + query(iter); + delete iter; + delete bm; + } +} + +int main() { + // Initialize counts map with all query IDs set to 0 + counts["BB"] = 0; + + char* file_path = "../dataset/bestbuy_small_records.json"; + RecordSet* record_set = RecordLoader::loadRecords(file_path); + if (record_set->size() == 0) { + cout << "Record loading failed." << endl; + return -1; + } + + int thread_num = std::thread::hardware_concurrency(); // Use as many threads as there are CPU cores + vector threads; + + // Start the timer + auto start_time = chrono::high_resolution_clock::now(); + + // Calculate the number of records each thread should process + int num_recs_per_thread = record_set->size() / thread_num; + for (int i = 0; i < thread_num; i++) { + int start = i * num_recs_per_thread; + int end = (i == thread_num - 1) ? record_set->size() : (i + 1) * num_recs_per_thread; + threads.emplace_back(process_records, record_set, start, end); + } + + // Wait for all threads to finish + for (auto& t : threads) { + if (t.joinable()) { + t.join(); + } + } + + // Stop the timer + auto end_time = chrono::high_resolution_clock::now(); + chrono::duration elapsed_time = end_time - start_time; + + delete record_set; + + // Output the counts in the specified format + cout << "ID\tJSONPath Query\tNumber of Matches" << endl; + cout << "BB\t{$.categoryPath[1:3].id}\t" << counts["BB"] << endl; + cout << "Execution time: " << elapsed_time.count() << " ms" << endl; + + return 0; +} \ No newline at end of file diff --git a/example/example3.cpp b/example/example3.cpp index 51df432..c20a204 100644 --- a/example/example3.cpp +++ b/example/example3.cpp @@ -1,8 +1,19 @@ +#include +#include +#include +#include +#include +#include #include "../src/RecordLoader.h" #include "../src/BitmapIterator.h" #include "../src/BitmapConstructor.h" -// {$.user.id, $.retweet_count} +using namespace std; + +// Define a mutex for thread-safe appending to the output string +mutex output_mutex; + +// The query function processes the JSON records according to the specified keys string query(BitmapIterator* iter) { string output = ""; if (iter->isObject()) { @@ -31,38 +42,51 @@ string query(BitmapIterator* iter) { return output; } +// Function to process a subset of records +void process_records(RecordSet* record_set, int start, int end, string& output) { + for (int i = start; i < end; i++) { + Bitmap* bm = BitmapConstructor::construct((*record_set)[i], 1, 2); // Each thread uses 1 thread internally + BitmapIterator* iter = BitmapConstructor::getIterator(bm); + string local_output = query(iter); + delete iter; + delete bm; + + // Lock the mutex before appending to the shared output string + output_mutex.lock(); + output.append(local_output); + output_mutex.unlock(); + } +} + int main() { char* file_path = "../dataset/twitter_sample_small_records.json"; RecordSet* record_set = RecordLoader::loadRecords(file_path); if (record_set->size() == 0) { - cout<<"record loading fails."<size(); - Bitmap* bm = NULL; - for (int i = 0; i < num_recs; i++) { - bm = BitmapConstructor::construct((*record_set)[i], thread_num, level_num); - BitmapIterator* iter = BitmapConstructor::getIterator(bm); - output.append(query(iter)); - delete iter; + int thread_num = std::thread::hardware_concurrency(); // Use as many threads as there are CPU cores + std::vector threads; + + // Calculate the number of records each thread should process + int num_recs_per_thread = record_set->size() / thread_num; + for (int i = 0; i < thread_num; i++) { + int start = i * num_recs_per_thread; + int end = (i == thread_num - 1) ? record_set->size() : (i + 1) * num_recs_per_thread; + threads.emplace_back(process_records, record_set, start, end, ref(output)); + } + + // Wait for all threads to finish + for (auto& t : threads) { + if (t.joinable()) { + t.join(); + } } - delete bm; + delete record_set; - - cout<<"matches are: "< +#include +#include +#include +#include +#include #include "../src/RecordLoader.h" #include "../src/BitmapIterator.h" #include "../src/BitmapConstructor.h" -// $.categoryPath[1:3].id +using namespace std; + +// Define a mutex for thread-safe appending to the output string +mutex output_mutex; + +// The query function processes the JSON records according to the specified keys string query(BitmapIterator* iter) { string output = ""; if (iter->isObject() && iter->moveToKey("categoryPath")) { - if (iter->down() == false) return output; /* value of "categoryPath" */ + if (iter->down() == false) return output; // value of "categoryPath" if (iter->isArray()) { for (int idx = 1; idx <= 2; ++idx) { // 2nd and 3rd elements inside "categoryPath" array @@ -27,38 +38,51 @@ string query(BitmapIterator* iter) { return output; } +// Function to process a subset of records +void process_records(RecordSet* record_set, int start, int end, string& output) { + for (int i = start; i < end; i++) { + Bitmap* bm = BitmapConstructor::construct((*record_set)[i], 1, 3); // Each thread uses 1 thread internally + BitmapIterator* iter = BitmapConstructor::getIterator(bm); + string local_output = query(iter); + delete iter; + delete bm; + + // Lock the mutex before appending to the shared output string + output_mutex.lock(); + output.append(local_output); + output_mutex.unlock(); + } +} + int main() { char* file_path = "../dataset/bestbuy_sample_small_records.json"; RecordSet* record_set = RecordLoader::loadRecords(file_path); if (record_set->size() == 0) { - cout<<"record loading fails."< threads; - // fix the number of threads to 1 for small records scenario; parallel bitmap construction is TBD. - int thread_num = 1; - - /* set the number of levels of bitmaps to create, either based on the - * query or the JSON records. E.g., query $[*].user.id needs three levels - * (level 0, 1, 2), but the record may be of more than three levels - */ - int level_num = 3; + // Calculate the number of records each thread should process + int num_recs_per_thread = record_set->size() / thread_num; + for (int i = 0; i < thread_num; i++) { + int start = i * num_recs_per_thread; + int end = (i == thread_num - 1) ? record_set->size() : (i + 1) * num_recs_per_thread; + threads.emplace_back(process_records, record_set, start, end, ref(output)); + } - /* process the records one by one: for each one, first build bitmap, then perform - * the query with a bitmap iterator - */ - int num_recs = record_set->size(); - Bitmap* bm = NULL; - for (int i = 0; i < num_recs; i++) { - bm = BitmapConstructor::construct((*record_set)[i], thread_num, level_num); - BitmapIterator* iter = BitmapConstructor::getIterator(bm); - output.append(query(iter)); - delete iter; + // Wait for all threads to finish + for (auto& t : threads) { + if (t.joinable()) { + t.join(); + } } - delete bm; + delete record_set; - cout<<"matches are: "< +#include +#include +#include +#include +#include +#include +#include "../src/RecordLoader.h" +#include "../src/BitmapIterator.h" +#include "../src/BitmapConstructor.h" + +using namespace std; + +// Define a mutex for thread-safe appending to the output string +mutex counts_mutex; + +// Map to store the counts for each JSONPath query +unordered_map counts; + + +// The query function processes the JSON records according to the specified keys +void query(BitmapIterator* iter) { + if (iter->isObject()) { + + if (iter->moveToKey("user")) { + if (iter->down()) { + if (iter->moveToKey("lang")) { + counts_mutex.lock(); + counts["TT"]++; // Increment the count for the "TT" query for $.user.lang + counts_mutex.unlock(); + + } + iter->up(); + } + } + if (iter->moveToKey("lang")) { + counts_mutex.lock(); + counts["TT"]++; // Increment the count for the "TT" query for $.lang + counts_mutex.unlock(); + } + } +} + + +// Function to process a subset of records +void process_records(RecordSet* record_set, int start, int end) { + for (int i = start; i < end; i++) { + Bitmap* bm = BitmapConstructor::construct((*record_set)[i], 1, 3); + BitmapIterator* iter = BitmapConstructor::getIterator(bm); + query(iter); + delete iter; + delete bm; + } +} + +int main() { + // Initialize counts map with all query IDs set to 0 + counts["TT"] = 0; + + char* file_path = "../dataset/twitter_small_records.json"; + RecordSet* record_set = RecordLoader::loadRecords(file_path); + if (record_set->size() == 0) { + cout << "Record loading failed." << endl; + return -1; + } + + int thread_num = std::thread::hardware_concurrency(); // Use as many threads as there are CPU cores + vector threads; + + // Calculate the number of records each thread should process + int num_recs_per_thread = record_set->size() / thread_num; + for (int i = 0; i < thread_num; i++) { + int start = i * num_recs_per_thread; + int end = (i == thread_num - 1) ? record_set->size() : (i + 1) * num_recs_per_thread; + threads.emplace_back(process_records, record_set, start, end); + } + + // Start the timer + auto start_time = chrono::high_resolution_clock::now(); + + // Wait for all threads to finish + for (auto& t : threads) { + if (t.joinable()) { + t.join(); + } + } + + // Stop the timer + auto end_time = chrono::high_resolution_clock::now(); + chrono::duration elapsed_time = end_time - start_time; + + // Output the counts in the specified format + cout << "ID\tJSONPath Query\tNumber of Matches" << endl; + cout << "TT\t{$.user.lang, $.lang}\t" << counts["TT"] << endl; + cout << "Execution time: " << elapsed_time.count() << " ms" << endl; + + delete record_set; + + return 0; +} \ No newline at end of file diff --git a/makefile b/makefile index e549178..45bfc59 100644 --- a/makefile +++ b/makefile @@ -1,34 +1,23 @@ DIR = bin -EXEC1 = $(DIR)/example1 -EXEC2 = $(DIR)/example2 -EXEC3 = $(DIR)/example3 -EXEC4 = $(DIR)/example4 -TARGET = $(EXEC1) ${EXEC2} ${EXEC3} ${EXEC4} +\ +EXEC4 = $(DIR)/best_buy +EXEC5 = $(DIR)/twitter +TARGET = ${EXEC4} ${EXEC5} all: $(TARGET) CC = g++ CC_FLAGS = -O3 -std=c++11 -mavx -mavx2 -msse -msse2 -msse4 -msse4.2 -mpclmul POST_FLAGS = -lpthread -mcmodel=medium -static-libstdc++ -SOURCE1 = src/*.cpp example/example1.cpp -$(EXEC1): $(SOURCE1) - mkdir -p $(DIR) - $(CC) $(CC_FLAGS) -o $(EXEC1) $(SOURCE1) $(POST_FLAGS) - -SOURCE2 = src/*.cpp example/example2.cpp -$(EXEC2): $(SOURCE2) - mkdir -p $(DIR) - $(CC) $(CC_FLAGS) -o $(EXEC2) $(SOURCE2) $(POST_FLAGS) -SOURCE3 = src/*.cpp example/example3.cpp -$(EXEC3): $(SOURCE3) - mkdir -p $(DIR) - $(CC) $(CC_FLAGS) -o $(EXEC3) $(SOURCE3) $(POST_FLAGS) - -SOURCE4 = src/*.cpp example/example4.cpp +SOURCE4 = src/*.cpp example/best_buy.cpp $(EXEC4): $(SOURCE4) mkdir -p $(DIR) $(CC) $(CC_FLAGS) -o $(EXEC4) $(SOURCE4) $(POST_FLAGS) +SOURCE5 = src/*.cpp example/twitter.cpp +$(EXEC5): $(SOURCE5) + mkdir -p $(DIR) + $(CC) $(CC_FLAGS) -o $(EXEC5) $(SOURCE5) $(POST_FLAGS) clean: -$(RM) $(TARGET) diff --git a/src/LocalBitmap.cpp b/src/LocalBitmap.cpp index 227738c..780f1d5 100644 --- a/src/LocalBitmap.cpp +++ b/src/LocalBitmap.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include using namespace std; diff --git a/src/SerialBitmap.cpp b/src/SerialBitmap.cpp index cf0206b..2a36530 100644 --- a/src/SerialBitmap.cpp +++ b/src/SerialBitmap.cpp @@ -19,6 +19,7 @@ #include #include #include +#include using namespace std; SerialBitmap::SerialBitmap() {