-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTokenizer.hpp
More file actions
executable file
·73 lines (57 loc) · 1.73 KB
/
Tokenizer.hpp
File metadata and controls
executable file
·73 lines (57 loc) · 1.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#ifndef _TOKENIZER_HPP
#define _TOKENIZER_HPP
#include <iostream>
#include <string>
#include <cctype>
#include "DataStructures/SinglyLinkedList.hpp"
class Tokenizer {
private:
static int getCharType(char c) {
//Returns 0 if whitespace, 1 if operator, 2 if bracket, and 3 if alphanumeric
//These correspond to values in the CharTypes enumeration
if ( (c == 40) || (c == 41) || (c == 91) || (c == 93) || (c == 123) || (c == 125) ) {
return 2;
} else {
return 2 * (std::isalnum(c) ? 1 : 0) + (!std::isspace(c) ? 1 : 0);
}
}
enum CharTypes {
WHITESPACE = 0, OPERATOR, BRACKET, ALPHANUMERIC
};
public:
static void tokenize(std::string& base, SinglyLinkedList<std::string>& tokens) {
//std::string* token = new std::string();
std::string::iterator it=base.begin();
while(it != base.end()) {
if(std::isspace(*it)) {
while(getCharType(*it) == 0 && (it != base.end())) {
++it;
}
} else if(getCharType(*it) == BRACKET) {
tokens.add(new std::string(1, *it));
++it;
} else if(*it == '"') {
std::string* token = new std::string();
token->append(1, *it);
++it;
while((*it != '"') && (it != base.end())) {
token->append(1,*it);
++it;
}
token->append(1, *it);
++it;
tokens.add(token);
} else {
std::string* token = new std::string();
int initialCharType = getCharType(*it);
while((getCharType(*it) == initialCharType) && (it != base.end())) {
token->append(1,*it);
++it;
}
if(token->find("//") != std::string::npos) return;
tokens.add(token);
}
}
}
};
#endif //_TOKENIZER_HPP