diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..eaf91e2
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,3 @@
+# Default ignored files
+/shelf/
+/workspace.xml
diff --git a/.idea/Graph_Edit_Distance.iml b/.idea/Graph_Edit_Distance.iml
new file mode 100644
index 0000000..d9e6024
--- /dev/null
+++ b/.idea/Graph_Edit_Distance.iml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
new file mode 100644
index 0000000..e1c2707
--- /dev/null
+++ b/.idea/inspectionProfiles/Project_Default.xml
@@ -0,0 +1,27 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..7b5bf37
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,4 @@
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..d06d743
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..9661ac7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/main.cpp b/main.cpp
index 990f2dc..693ec34 100644
--- a/main.cpp
+++ b/main.cpp
@@ -4,6 +4,10 @@
#include "Timer.h"
#include "popl.hpp"
+#include
+#include
+#include
+
using namespace std;
using namespace popl;
@@ -13,7 +17,7 @@ void print_usage() {
}
ui label2int(const char *str, map &M) {
- if(M.find(string(str)) == M.end()) M[string(str)] = M.size();
+ if (M.find(string(str)) == M.end()) M[string(str)] = M.size();
return M[string(str)];
}
@@ -22,7 +26,7 @@ ui load_db(const char *file_name, vector &graphs, map &vM, ma
const ui MAX_LINE = 1024;
char line[MAX_LINE];
- if(fgets(line, MAX_LINE, fin) == NULL) {
+ if (fgets(line, MAX_LINE, fin) == nullptr) {
fclose(fin);
return 0;
}
@@ -34,34 +38,35 @@ ui load_db(const char *file_name, vector &graphs, map &vM, ma
string id(buf);
line[0] = 'x';
- vector > vertices;
- vector,ui> > edges;
- while(fgets(line, MAX_LINE, fin) != NULL&&line[0] != 't') {
- if(line[0] == 'v') {
+ vector> vertices;
+ vector,ui>> edges;
+ while(fgets(line, MAX_LINE, fin) != nullptr && line[0] != 't') {
+ if (line[0] == 'v') {
int a;
sscanf(line+2, "%d%s", &a, buf);
- //buf[0] = '1';
vertices.pb(mp(a, label2int(buf, vM)));
}
- else if(line[0] == 'e') {
+ else if (line[0] == 'e') {
int a, b;
sscanf(line+2, "%d%d%s", &a, &b, buf);
edges.pb(mp(mp(a,b), label2int(buf, eM)));
edges.pb(mp(mp(b,a), label2int(buf, eM)));
}
- else printf("!!! Unrecongnized first letter in a line when loading DB!\n");
+ else printf("!!! Unrecognized first letter in a line when loading DB !!!\n");
line[0] = 'x';
}
+ int v_size = static_cast(vertices.size());
+ int e_size = static_cast(edges.size());
sort(vertices.begin(), vertices.end());
- for(ui i = 0;i < vertices.size();i ++) assert(vertices[i].first == i);
- if(vertices.size() > max_n) max_n = vertices.size();
+ for (ui i = 0; i < v_size; i++) assert(vertices[i].first == i);
+ if (v_size > max_n) max_n = v_size;
sort(edges.begin(), edges.end());
- for(ui i = 0;i < edges.size();i ++) {
- assert(edges[i].first.first >= 0&&edges[i].first.first < vertices.size());
- assert(edges[i].first.second >= 0&&edges[i].first.second < vertices.size());
- if(i > 0) assert(edges[i].first != edges[i-1].first);
+ for (ui i = 0; i < e_size; i++) {
+ assert(edges[i].first.first >= 0 && edges[i].first.first < v_size);
+ assert(edges[i].first.second >= 0 && edges[i].first.second < v_size);
+ if (i > 0) assert(edges[i].first != edges[i-1].first);
assert(edges[i].second < eM.size());
}
@@ -74,21 +79,19 @@ ui load_db(const char *file_name, vector &graphs, map &vM, ma
void generate_queries(const vector &db, vector &queries, ui q_n) {
assert(!db.empty());
- srand(time(NULL));
- for(ui i = 0;i < q_n;i ++) queries.pb(rand()%db.size());
+ srand(time(nullptr));
+ for (ui i = 0; i < q_n; i ++) queries.pb(rand() % db.size());
}
void write_queries(const char *file_name, const vector &db, const vector &queries, const map &vM, const map &eM, bool bss) {
vector vlabels(vM.size());
vector elabels(eM.size());
- for(pair p: vM) vlabels[p.second] = p.first;
- for(pair p: eM) elabels[p.second] = p.first;
+ for (pair p: vM) vlabels[p.second] = p.first;
+ for (pair p: eM) elabels[p.second] = p.first;
FILE *fout = Utility::open_file(file_name, "w");
-
- for(ui i = 0;i < queries.size();i ++) db[queries[i]]->write_graph(fout, vlabels, elabels, bss);
-
+ for (ui i = 0; i < queries.size(); i++) db[queries[i]]->write_graph(fout, vlabels, elabels, bss);
fclose(fout);
}
@@ -102,7 +105,7 @@ int main(int argc, char *argv[]) {
print_usage();
- string mode, paradigm, lower_bound;
+ string mode, paradigm, lower_bound, save_filepath;
int threshold = -1;
bool print_ged = false;
@@ -110,16 +113,26 @@ int main(int argc, char *argv[]) {
auto help_option = op.add("h", "help", "\'produce help message\'");
auto database_option = op.add>("d", "database", "\'database file name\'");
auto query_option = op.add>("q", "query", "\'query file name\'");
- auto mode_option = op.add>("m", "mode", "\'running mode\' (search | pair)", "search", &mode);
+ auto save_option = op.add>("s", "save", "\'pairwise distance filename", "datasets/pairwise_ged.csv", &save_filepath);
+ auto mode_option = op.add>("m", "mode", "\'running mode\' (search | pair | pairwise)", "search", &mode);
auto paradigm_option = op.add>("p", "paradigm", "\'search paradigm\' (astar | dfs)", "astar", ¶digm);
auto lower_bound_option = op.add>("l", "lower_bound", "\'lower bound method\' (LSa | BMao | BMa)", "BMao", &lower_bound);
auto threshold_option = op.add>("t", "threshold", "\'threshold for GED verification; if not provided, then GED computation", -1, &threshold);
op.add("g", "ged", "\'print_ged\'", &print_ged);
-
op.parse(argc, argv);
- if(help_option->is_set()||argc == 1) cout << op << endl;
- if(!database_option->is_set()||!query_option->is_set()) {
+ if (help_option->is_set()||argc == 1) cout << op << endl;
+ if (mode == "pairwise") {
+ if (!database_option->is_set()) {
+ printf("!!! Database file name not provided! Exit !!!\n");
+ return 0;
+ } else {
+ query_option = database_option;
+ }
+ if (query_option->is_set()) {
+ printf("Warning: query file ignored. Pairwise distances are calculated for the compounds from database file\n");
+ }
+ } else if (!database_option->is_set() || !query_option->is_set()) {
printf("!!! Database file name or query file name is not provided! Exit !!!\n");
return 0;
}
@@ -130,9 +143,8 @@ int main(int argc, char *argv[]) {
map vM, eM;
ui max_db_n = load_db(database.c_str(), db, vM, eM);
- printf("*** %s %s %s %d: %s %s", mode.c_str(), paradigm.c_str(), lower_bound.c_str(), threshold, database.c_str(), query.c_str());
#ifdef _EXPAND_ALL_
- //printf(" Expand_all");
+ // printf(" Expand_all");
#else
printf(" Expand_one");
#endif
@@ -148,7 +160,7 @@ int main(int argc, char *argv[]) {
ui max_query_n = load_db(query.c_str(), queries, vM, eM);
ui verify_upper_bound;
- if(threshold < 0) verify_upper_bound = INF;
+ if (threshold < 0) verify_upper_bound = INF;
else verify_upper_bound = (ui)threshold;
long long search_space = 0;
@@ -161,170 +173,195 @@ int main(int argc, char *argv[]) {
memset(vlabel_cnt, 0, sizeof(int)*vM.size());
memset(elabel_cnt, 0, sizeof(int)*eM.size());
- if(max_query_n > max_db_n) max_db_n = max_query_n;
+ if (max_query_n > max_db_n) max_db_n = max_query_n;
int *degree_q = new int[max_db_n];
int *degree_g = new int[max_db_n];
int *tmp = new int[max_db_n];
- if(strcmp(mode.c_str(), "pair") != 0&&strcmp(mode.c_str(), "search") != 0) {
- printf("!!! Wrong mode (pair | search) selection!\n");
+ if (mode != "pair" && mode != "search" && mode != "pairwise") {
+ printf("!!! Wrong mode (pair | search) selection !!!\n");
return 0;
}
- if(strcmp(paradigm.c_str(), "astar") != 0&&strcmp(paradigm.c_str(), "dfs") != 0) {
- printf("!!! Wrong algorithm (astar | dfs) selection!\n");
+ if (paradigm != "astar" && paradigm != "dfs") {
+ printf("!!! Wrong algorithm (astar | dfs) selection !!!\n");
return 0;
}
Timer t;
- if(strcmp(mode.c_str(), "pair") == 0) {
+ if (mode == "pair") {
long long time1 = 0, cnt1 = 0, ss1 = 0;
long long time2 = 0, cnt2 = 0, ss2 = 0;
- if(queries.size() != db.size()) {
+ int db_size = static_cast(db.size());
+ int q_size = static_cast(queries.size());
+ if (q_size != db_size) {
printf("Query size != db size in the pair mode\n");
exit(0);
}
- if(print_ged) printf("*** GEDs ***\n");
- ui min_ged = 1000000000, max_ged = 0;
- for(ui i = 0;i < queries.size();i ++) {
- ui current = i*100/queries.size();
- if(current != pre) {
+ if (print_ged) printf("*** GEDs ***\n");
+ ui min_ged = INT_MAX, max_ged = 0;
+ for (ui i = 0; i < q_size; i++) {
+ ui current = i*100/q_size;
+ if (current != pre) {
fprintf(stderr, "\r[%d%% finished]", current);
fflush(stderr);
- //cout<<"\r["<ged_lower_bound_filter(db[i], verify_upper_bound, vlabel_cnt, elabel_cnt, degree_q, degree_g, tmp);
- if(lb > verify_upper_bound) continue;
+ if (lb > verify_upper_bound) continue;
- ++ candidates_cnt;
+ ++candidates_cnt;
Timer t1;
Application *app = new Application(verify_upper_bound, lower_bound.c_str());
app->init(db[i], queries[i]);
int res = INF;
- if(strcmp(paradigm.c_str(), "astar") == 0) res = app->AStar();
- else res = app->DFS(NULL);
+ if (paradigm == "astar") res = app->AStar();
+ else res = app->DFS(nullptr);
#ifndef NDEBUG
assert(res == app->compute_ged_of_BX());
#endif
search_space += app->get_search_space();
- if(res <= verify_upper_bound) ++ results_cnt;
+ if (res <= verify_upper_bound)++results_cnt;
else res = -1;
- if(print_ged) {
+ if (print_ged) {
printf("%d\n", res);
- if(res > max_ged) max_ged = res;
- if(res < min_ged) min_ged = res;
+ if (res > max_ged) max_ged = res;
+ if (res < min_ged) min_ged = res;
}
-
- if(res == -1) {
+ if (res == -1) {
time2 += t1.elapsed();
ss2 += app->get_search_space();
- ++ cnt2;
- }
- else {
+ ++cnt2;
+ } else {
time1 += t1.elapsed();
ss1 += app->get_search_space();
- ++ cnt1;
+ ++cnt1;
}
-
- //printf("%u %u\n", db[i]->n, queries[i]->n);
- //if(db[i]->id.compare(queries[i]->id) < 0) printf("\t(pair_%u %s %s) GED: %d, Time: %s, Search space: %lld\n", i, db[i]->id.c_str(), queries[i]->id.c_str(), res, Utility::integer_to_string(t1.elapsed()).c_str(), app->get_search_space());
- //else printf("\t(pair_%u %s %s) GED: %d, Time: %s, Search space: %lld\n", i, queries[i]->id.c_str(), db[i]->id.c_str(), res, Utility::integer_to_string(t1.elapsed()).c_str(), app->get_search_space());
- //fflush(stdout);
-
delete app;
}
fprintf(stderr, "\n");
- if(print_ged) {
+ if (print_ged) {
printf("*** GEDs ***\n");
printf("min_ged: %u, max_ged: %u\n", min_ged, max_ged);
}
- //printf("%d %d\n", cnt1, cnt2);
- if(cnt1 + cnt2 != 0) printf("total average time: %s, total average_ss: %lld\n", Utility::integer_to_string((time1+time2)/(cnt1+cnt2)).c_str(), (ss1+ss2)/(cnt1+cnt2));
- if(verify_upper_bound < INF) {
+ if (cnt1 + cnt2 != 0) printf("total average time: %s, total average_ss: %lld\n", Utility::integer_to_string((time1+time2)/(cnt1+cnt2)).c_str(), (ss1+ss2)/(cnt1+cnt2));
+ if (verify_upper_bound < INF) {
printf("Dissimilar (%lld pairs) average time: ", cnt2);
- if(cnt2 == 0) printf("0, ");
+ if (cnt2 == 0) printf("0, ");
else printf("%s, ", Utility::integer_to_string(time2/cnt2).c_str());
printf("Dissimilar average space: ");
- if(cnt2 == 0) printf("0\n");
+ if (cnt2 == 0) printf("0\n");
else printf("%lld\n", ss2/cnt2);
printf("Similar (%lld pairs) average time: ", cnt1);
- if(cnt1 == 0) printf("0, ");
+ if (cnt1 == 0) printf("0, ");
else printf("%s, ", Utility::integer_to_string(time1/cnt1).c_str());
printf("Similar average space: ");
- if(cnt1 == 0) printf("0\n");
+ if (cnt1 == 0) printf("0\n");
else printf("%lld\n", ss1/cnt1);
}
- }
- else {
+ printf("Total time: %s (microseconds), total search space: %lld\n #candidates: %lld, #matches: %lld\n", Utility::integer_to_string(t.elapsed()).c_str(), search_space, candidates_cnt, results_cnt);
+ } else if (mode == "pairwise") {
long long total_res = 0;
- if(print_ged) printf("*** GEDs ***\n");
- ui min_ged = 1000000000, max_ged = 0;
- for(ui i = 0;i < queries.size();i ++) {
- for(ui j = 0; j < db.size();j ++) {
- ui current = (i*(long long)(db.size())+j+1)*100/(queries.size()*(long long)(db.size()));
- if(current != pre) {
+ int db_size = static_cast(db.size());
+ vector> ged_matrix(db_size, vector(db_size, 0));
+ ui min_ged = INT_MAX, max_ged = 0;
+
+ for (ui i = 0; i < db_size; i++) {
+ for (ui j = i+1; j < db_size; j++) {
+ Application *app = new Application(verify_upper_bound, lower_bound.c_str());
+ app->init(db[i], db[j]);
+ int res = INF;
+ if (paradigm == "astar") res = app->AStar();
+ else res = app->DFS(nullptr);
+ ged_matrix[i][j] = ged_matrix[j][i] = res;
+ if (res > max_ged) max_ged = res;
+ if (res < min_ged) min_ged = res;
+ total_res += res;
+
+ delete app;
+ }
+ }
+ std::ofstream out(save_filepath);
+ for (auto& row : ged_matrix) {
+ for (int k = 0; k < db_size-1; k++) {
+ out << row[k] <<',';
+ }
+ out << row[db_size-1] << '\n';
+ }
+ printf("*** GEDs ***\n");
+ printf("Min ged: %u, max ged: %u avg ged: %.3lf\n", min_ged, max_ged, double(total_res) / (db_size*(db_size-1)/2));
+ printf("Total time: %s (microseconds)\n", Utility::integer_to_string(t.elapsed()).c_str());
+ printf("Results saved in %s\n", save_filepath.c_str());
+ } else {
+ long long total_res = 0;
+ int db_size = static_cast(db.size());
+ int q_size = static_cast(queries.size());
+
+ if (print_ged) printf("*** GEDs ***\n");
+ ui min_ged = INT_MAX, max_ged = 0;
+ for (ui i = 0; i < q_size; i++) {
+ for (ui j = 0; j < db_size; j++) {
+ ui current = (i*(long long)(db_size)+j+1)*100/(q_size*(long long)(db_size));
+ if (current != pre) {
fprintf(stderr, "\r[%d%% finished]", current);
fflush(stderr);
- //cout<<"\r["<ged_lower_bound_filter(db[j], verify_upper_bound, vlabel_cnt, elabel_cnt, degree_q, degree_g, tmp);
- if(lb > verify_upper_bound) continue;
+ if (lb > verify_upper_bound) continue;
- ++ candidates_cnt;
+ ++candidates_cnt;
Application *app = new Application(verify_upper_bound, lower_bound.c_str());
- //app->init(db_v[i], db_e[i], query_v[i], query_e[i]);
+ // app->init(db_v[i], db_e[i], query_v[i], query_e[i]);
app->init(db[j], queries[i]);
int res = INF;
- if(strcmp(paradigm.c_str(), "astar") == 0) res = app->AStar();
- else res = app->DFS(NULL);
+ if (paradigm == "astar") res = app->AStar();
+ else res = app->DFS(nullptr);
#ifndef NDEBUG
assert(res == app->compute_ged_of_BX());
#endif
- if(print_ged) {
- if(j) printf(" ");
+ if (print_ged) {
+ if (j) printf(" ");
printf("%u", res);
- if(res > max_ged) max_ged = res;
- if(res < min_ged) min_ged = res;
+ if (res > max_ged) max_ged = res;
+ if (res < min_ged) min_ged = res;
}
total_res += res;
- //printf("pair %lu (%s, %s): %d\n", i*db.size()+j, queries[i]->id.c_str(), db[j]->id.c_str(), res);
+ // printf("pair %lu (%s, %s): %d\n", i*db.size()+j, queries[i]->id.c_str(), db[j]->id.c_str(), res);
search_space += app->get_search_space();
- if(res <= verify_upper_bound) ++ results_cnt;
+ if (res <= verify_upper_bound) ++results_cnt;
delete app;
}
- if(print_ged) printf("\n");
+ if (print_ged) printf("\n");
}
fprintf(stderr, "\n");
- if(print_ged) {
+ if (print_ged) {
printf("*** GEDs ***\n");
- printf("min_ged: %u, max_ged: %u\n", min_ged, max_ged);
+ printf("Min ged: %u, max ged: %u avg ged: %.3lf\n", min_ged, max_ged, double(total_res) / (q_size*db_size));
}
- //printf("Average GED: %.3lf\n", double(total_res)/(queries.size()*db.size()));
+ printf("Total time: %s (microseconds), total search space: %lld\n #candidates: %lld, #matches: %lld\n", Utility::integer_to_string(t.elapsed()).c_str(), search_space, candidates_cnt, results_cnt);
}
- printf("Total time: %s (microseconds), total search space: %lld\n #candidates: %lld, #matches: %lld\n", Utility::integer_to_string(t.elapsed()).c_str(), search_space, candidates_cnt, results_cnt);
- delete[] vlabel_cnt; vlabel_cnt = NULL;
- delete[] elabel_cnt; elabel_cnt = NULL;
- delete[] degree_q; degree_q = NULL;
- delete[] degree_g; degree_g = NULL;
- delete[] tmp; tmp = NULL;
+ delete[] vlabel_cnt; vlabel_cnt = nullptr;
+ delete[] elabel_cnt; elabel_cnt = nullptr;
+ delete[] degree_q; degree_q = nullptr;
+ delete[] degree_g; degree_g = nullptr;
+ delete[] tmp; tmp = nullptr;
- for(ui i = 0;i < db.size();i ++) {
+ for (ui i = 0; i < db.size(); i++) {
delete db[i];
db[i] = nullptr;
}
- for(ui i = 0;i < queries.size();i ++) {
+ for (ui i = 0; i < queries.size(); i++) {
delete queries[i];
queries[i] = nullptr;
}