CPE462-Project/project.cpp at main · TrentR2786/CPE462-Project · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#include <leptonica/allheaders.h>
#include <tesseract/baseapi.h>

#include <iostream>
#include <opencv2/opencv.hpp>
#include <string>

using namespace std;
using namespace cv;
using namespace tesseract;

string normalize(string word) {
  // Make whole word lowercase, ignore case sensitivity
  transform(word.begin(), word.end(), word.begin(), ::tolower);

  // Remove all non-alphanumeric characters from word
  for (auto it = word.begin(); it != word.end(); it++) {
    if (!isalnum(word.at(it - word.begin()))) {
      word.erase(it);
      it--;
    }
  }

  return word;
}

int main(int argc, char* argv[]) {
  // Check for correct number of arguments
  if (argc != 4) {
    cout << "Usage: " << argv[0] << " <input filename> <list of censored words (in one quote, words separated by spaces)> <output filename>";
    return -1;
  }

  // Read image
  Mat image = imread(argv[1]);
  if (image.empty()) {
    cout << "Invalid or missing image." << endl;
    return -1;
  }

  // Convert image to grayscale
  Mat image_gray;
  cvtColor(image, image_gray, COLOR_BGR2GRAY);

  // Rescale image for better legibility
  Mat image_scaled;
  resize(image_gray, image_scaled, Size(0,0), 1.2, 1.2, INTER_CUBIC);

  // Gaussian blur the image to remove noise
  Mat image_blur;
  GaussianBlur(image_scaled, image_blur, Size(0,0), 33, 33);
  divide(image_scaled, image_blur, image_blur, 255);

  // Binarization mask to convert text to 255 and everything else to 0
  Mat image_thresh;
  threshold(image_blur, image_thresh, 0, 255, THRESH_BINARY_INV + THRESH_OTSU);

  // Modified deskew code based on original code from
  // http://felix.abecassis.me/2011/10/opencv-rotation-deskewing/
  // Find all points in skewed image
  vector<Point> points;
  for (Mat_<uchar>::iterator it = image_thresh.begin<uchar>(); it != image_thresh.end<uchar>(); ++it) {
    if (*it) {
      points.push_back(it.pos());
    }
  }

  // Create largest bounding box and rotation matrix for skewed image
  RotatedRect rect = minAreaRect(Mat(points));
  Mat rot_mat = getRotationMatrix2D(rect.center, rect.angle < -45 ? rect.angle += 90 : rect.angle, 1);

  // Deskew image
  Mat image_rot;
  warpAffine(image_thresh, image_rot, rot_mat, image_thresh.size(), INTER_CUBIC);

  // Invert binarization mask so Tesseract can read it more accurately
  bitwise_not(image_rot, image_rot);

  // Modified Tesseract API code based on original examples from
  // https://medium.com/building-a-simple-text-correction-tool/basic-ocr-with-tesseract-and-opencv-34fae6ab3400
  // https://tesseract-ocr.github.io/tessdoc/APIExample.html
  // Initialize optical character recognition library
  TessBaseAPI* ocr = new TessBaseAPI();
  ocr->Init(NULL, "eng", OEM_LSTM_ONLY);
  ocr->SetImage(image_rot.data, image_rot.cols, image_rot.rows, 1, image_rot.step);
  ocr->Recognize(0);

  // Initialize iterator to detect each individual word
  ResultIterator* it = ocr->GetIterator();
  PageIteratorLevel level = RIL_WORD;

  // Initialize censored output image
  Mat image_censored = image_rot.clone();

  // Import censored words & convert into list
  string censoredWords = argv[2];
  vector<string> censorList;
  int current_pos = 0;
  while (1) {
    int space_pos = censoredWords.find(" ", current_pos);
    if (space_pos != string::npos) {
      string censoredWord = censoredWords.substr(current_pos, space_pos - current_pos);
      censoredWord = normalize(censoredWord);
      censorList.push_back(censoredWord);
      current_pos = space_pos + 1;
    } else {
      string censoredWord = censoredWords.substr(current_pos);
      censoredWord = normalize(censoredWord);
      censorList.push_back(censoredWord);
      break;
    }
  }

  // Print out censored words (DEBUG)
  /*
  for (int i = 0; i < censorList.size(); i++) {
    cout << censorList[i] << endl;
  }
  */

  // Go through every word detected
  if (it != 0) {
    do {
      // Detect bounding box coordinates from image using Tesseract
      string word = string(it->GetUTF8Text(level));
      int tlx, tly, brx, bry;
      it->BoundingBox(level, &tlx, &tly, &brx, &bry);

      // Print coordinate information (DEBUG)
      //  cout << "word: '" << word << "'; Coords: (" << tlx << "," << tly << "), (" << brx << ", " << bry << ") " << endl;

      word = normalize(word);

      // Draw censor box on word if it matches with list
      for (int i = 0; i < censorList.size(); i++) {
        if (censorList[i].compare(word) == 0) {
          rectangle(image_censored, Point(tlx, tly), Point(brx, bry), Scalar(0, 0, 0), FILLED);
        }
      }
    } while (it->Next(level));
  }

  // Display input and output images
  imshow("Input", image);
  imshow("Preprocessed", image_rot);
  imshow("Output", image_censored);
  waitKey(0);
  destroyWindow("Input");
  destroyWindow("Preprocessed");
  destroyWindow("Output");

  ocr->End();
  // Save output image
  imwrite(argv[3], image_censored);
  return 0;
}