From 3d96d5ebc3e39ba04cfc4adb0fd435fc05c3d63d Mon Sep 17 00:00:00 2001 From: PDBdecipherer Date: Wed, 24 Feb 2016 15:16:43 +0800 Subject: [PATCH 1/5] Update clstmocr.cc --- clstmocr.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/clstmocr.cc b/clstmocr.cc index 131759d..a4c43c6 100644 --- a/clstmocr.cc +++ b/clstmocr.cc @@ -46,6 +46,10 @@ int main1(int argc, char **argv) { string load_name = getsenv("load", ""); if (load_name == "") THROW("must give load= parameter"); CLSTMOCR clstm; + // open options for input text-line dimension, + // as well as normalization style. + clstm.target_height = int(getrenv("target_height", 45)); + clstm.dewarp = getsenv("dewarp", "none"); clstm.load(load_name); bool conf = getienv("conf", 0); From b5bf6f14593b6567fe06eb322c31107113eeebff Mon Sep 17 00:00:00 2001 From: PDBdecipherer Date: Wed, 24 Feb 2016 15:26:01 +0800 Subject: [PATCH 2/5] Update clstmocrtrain.cc --- clstmocrtrain.cc | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/clstmocrtrain.cc b/clstmocrtrain.cc index eb31249..0f5bdaf 100644 --- a/clstmocrtrain.cc +++ b/clstmocrtrain.cc @@ -111,7 +111,9 @@ int main1(int argc, char **argv) { trainingset.getCodec(codec); print("got", codec.size(), "classes"); - clstm.target_height = int(getrenv("target_height", 48)); + clstm.target_height = int(getrenv("target_height", 45)); + // Add option for normalization style + clstm.dewarp = getsenv("dewarp", "none"); clstm.createBidi(codec.codec, getienv("nhidden", 100)); clstm.setLearningRate(getdenv("lrate", 1e-4), getdenv("momentum", 0.9)); } @@ -134,13 +136,18 @@ int main1(int argc, char **argv) { save_trigger.enable(save_name != "").skip0(); Trigger report_trigger(getienv("report_every", 100), ntrain, start); Trigger display_trigger(getienv("display_every", 0), ntrain, start); - + + // Add log for training error evolution. + double train_errors = 0.0; + double train_count = 0.0; for (int trial = start; trial < ntrain; trial++) { int sample = lrand48() % trainingset.size(); Tensor2 raw; wstring gt; trainingset.readSample(raw, gt, sample); wstring pred = clstm.train(raw(), gt); + train_count += gt.size(); + train_errors += lenvenshtein(pred, gt); if (report_trigger(trial)) { print(trial); @@ -167,6 +174,14 @@ int main1(int argc, char **argv) { double errors = tse.first; double count = tse.second; test_error = errors / count; + double train_error; + if (train_errors > 0) + train_error = train_count / train_errors; + else + train_error = 9999.0; + print("Train ERROR: ", train_error); + train_count = 0.0; + train_errors = 0.0; print("ERROR", trial, test_error, " ", errors, count); if (test_error < best_error) { best_error = test_error; From f7966821f09d2ac2dd35cbcb203ef3f25642a42a Mon Sep 17 00:00:00 2001 From: PDBdecipherer Date: Wed, 24 Feb 2016 15:32:33 +0800 Subject: [PATCH 3/5] Update clstmhl.h Support option for text-line normalization style --- clstmhl.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/clstmhl.h b/clstmhl.h index 1df36b0..923c0b8 100644 --- a/clstmhl.h +++ b/clstmhl.h @@ -146,8 +146,9 @@ struct CLSTMText { struct CLSTMOCR { shared_ptr normalizer; Network net; - int target_height = 48; + int target_height;// = 48; // to avoid unwanted values. int nclasses = -1; + string dewarp; // Option for text-line normalization Sequence aligned, targets; Tensor2 image; void setLearningRate(float lr, float mom) { net->setLearningRate(lr, mom); } @@ -161,7 +162,7 @@ struct CLSTMOCR { return false; } nclasses = net->codec.size(); - normalizer.reset(make_CenterNormalizer()); + normalizer.reset(make_Normalizer(dewarp)); normalizer->target_height = target_height; return true; } @@ -194,7 +195,7 @@ struct CLSTMOCR { {"nhidden", nhidden}}); net->initialize(); net->codec.set(codec); - normalizer.reset(make_CenterNormalizer()); + normalizer.reset(make_Normalizer(dewarp)); normalizer->target_height = target_height; } std::wstring fwdbwd(TensorMap2 raw, const std::wstring &target) { From de287ada1c1bda9052b871adc745eda1dc0a5c05 Mon Sep 17 00:00:00 2001 From: PDBdecipherer Date: Wed, 24 Feb 2016 15:33:26 +0800 Subject: [PATCH 4/5] Update extras.h --- extras.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extras.h b/extras.h index eaa7533..3ba093b 100644 --- a/extras.h +++ b/extras.h @@ -31,7 +31,7 @@ using std::min; // text line normalization struct INormalizer { - int target_height = 48; + int target_height; // = 48; float smooth2d = 1.0; float smooth1d = 0.3; float range = 4.0; From 7be0e5c33de252321dcab51ff54316e111121133 Mon Sep 17 00:00:00 2001 From: crazylyf Date: Thu, 17 Mar 2016 14:57:18 +0800 Subject: [PATCH 5/5] clstm update --- clstmocr.cc | 4 +--- clstmocrtrain.cc | 39 ++++++++++++++++++++++++++++++--------- 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/clstmocr.cc b/clstmocr.cc index a4c43c6..414191c 100644 --- a/clstmocr.cc +++ b/clstmocr.cc @@ -46,12 +46,10 @@ int main1(int argc, char **argv) { string load_name = getsenv("load", ""); if (load_name == "") THROW("must give load= parameter"); CLSTMOCR clstm; - // open options for input text-line dimension, - // as well as normalization style. clstm.target_height = int(getrenv("target_height", 45)); clstm.dewarp = getsenv("dewarp", "none"); clstm.load(load_name); - + bool conf = getienv("conf", 0); string output = getsenv("output", "text"); bool save_text = getienv("save_text", 1); diff --git a/clstmocrtrain.cc b/clstmocrtrain.cc index 0f5bdaf..8cda1b6 100644 --- a/clstmocrtrain.cc +++ b/clstmocrtrain.cc @@ -65,6 +65,19 @@ struct Dataset { for (auto s : fnames) gtnames.push_back(basename(s) + ".gt.txt"); codec.build(gtnames, charsep); } + void getCodec(Codec &codec, vector file_lists) { + // get codec from several files, including training files, validation files, + // and perhaps testing files, in order to avoid unrecognized codecs + vector gts; + for (int i=0; i temp_names; + read_lines(temp_names, file_lists[i]); + for (auto s : temp_names) gts.push_back(basename(s) + ".gt.txt"); + } + // build the codecs + codec.build(gts, charsep); + } + void readSample(Tensor2 &raw, wstring >, int index) { string fname = fnames[index]; string base = basename(fname); @@ -92,12 +105,19 @@ int main1(int argc, char **argv) { int ntrain = getienv("ntrain", 10000000); string save_name = getsenv("save_name", "_ocr"); int report_time = getienv("report_time", 0); + // vector storing the training and testing files + vector file_lists; if (argc < 2 || argc > 3) THROW("... training [testing]"); Dataset trainingset(argv[1]); + file_lists.push_back(argv[1]); assert(trainingset.size() > 0); Dataset testset; - if (argc > 2) testset.readFileList(argv[2]); + if (argc > 2) { + testset.readFileList(argv[2]); + file_lists.push_back(argv[2]); + } + print("got", trainingset.size(), "files,", testset.size(), "tests"); string load_name = getsenv("load", ""); @@ -108,15 +128,16 @@ int main1(int argc, char **argv) { clstm.load(load_name); } else { Codec codec; - trainingset.getCodec(codec); + //trainingset.getCodec(codec); + trainingset.getCodec(codec, file_lists); // use all ground truth files print("got", codec.size(), "classes"); clstm.target_height = int(getrenv("target_height", 45)); - // Add option for normalization style clstm.dewarp = getsenv("dewarp", "none"); clstm.createBidi(codec.codec, getienv("nhidden", 100)); clstm.setLearningRate(getdenv("lrate", 1e-4), getdenv("momentum", 0.9)); } + file_lists.clear(); // clear the file_lists vector network_info(clstm.net); double test_error = 9999.0; @@ -136,8 +157,7 @@ int main1(int argc, char **argv) { save_trigger.enable(save_name != "").skip0(); Trigger report_trigger(getienv("report_every", 100), ntrain, start); Trigger display_trigger(getienv("display_every", 0), ntrain, start); - - // Add log for training error evolution. + double train_errors = 0.0; double train_count = 0.0; for (int trial = start; trial < ntrain; trial++) { @@ -147,7 +167,7 @@ int main1(int argc, char **argv) { trainingset.readSample(raw, gt, sample); wstring pred = clstm.train(raw(), gt); train_count += gt.size(); - train_errors += lenvenshtein(pred, gt); + train_errors += levenshtein(pred, gt); if (report_trigger(trial)) { print(trial); @@ -174,15 +194,16 @@ int main1(int argc, char **argv) { double errors = tse.first; double count = tse.second; test_error = errors / count; + print("ERROR", trial, test_error, " ", errors, count); double train_error; if (train_errors > 0) - train_error = train_count / train_errors; + train_error = train_count / train_errors; else - train_error = 9999.0; + train_error = 9999.0; print("Train ERROR: ", train_error); train_count = 0.0; train_errors = 0.0; - print("ERROR", trial, test_error, " ", errors, count); + if (test_error < best_error) { best_error = test_error; string fname = save_name + ".clstm";