Skip to content

Commit ac5cf8a

Browse files
committed
make it possible to pass char buffer to simplecpp
1 parent 5f9039e commit ac5cf8a

File tree

8 files changed

+193
-27
lines changed

8 files changed

+193
-27
lines changed

externals/simplecpp/simplecpp.cpp

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,42 @@ class StdIStream : public simplecpp::TokenList::Stream {
377377
std::istream &istr;
378378
};
379379

380+
class StdCharBufStream : public simplecpp::TokenList::Stream {
381+
public:
382+
// cppcheck-suppress uninitDerivedMemberVar - we call Stream::init() to initialize the private members
383+
StdCharBufStream(const unsigned char* str, std::size_t size)
384+
: str(str)
385+
, size(size)
386+
, pos(0)
387+
, lastStatus(0)
388+
{
389+
init();
390+
}
391+
392+
virtual int get() OVERRIDE {
393+
if (pos >= size)
394+
return lastStatus = EOF;
395+
return str[pos++];
396+
}
397+
virtual int peek() OVERRIDE {
398+
if (pos >= size)
399+
return lastStatus = EOF;
400+
return str[pos];
401+
}
402+
virtual void unget() OVERRIDE {
403+
--pos;
404+
}
405+
virtual bool good() OVERRIDE {
406+
return lastStatus != EOF;
407+
}
408+
409+
private:
410+
const unsigned char *str;
411+
const std::size_t size;
412+
std::size_t pos;
413+
int lastStatus;
414+
};
415+
380416
class FileStream : public simplecpp::TokenList::Stream {
381417
public:
382418
// cppcheck-suppress uninitDerivedMemberVar - we call Stream::init() to initialize the private members
@@ -442,6 +478,20 @@ simplecpp::TokenList::TokenList(std::istream &istr, std::vector<std::string> &fi
442478
readfile(stream,filename,outputList);
443479
}
444480

481+
simplecpp::TokenList::TokenList(const unsigned char* data, std::size_t size, std::vector<std::string> &filenames, const std::string &filename, OutputList *outputList)
482+
: frontToken(nullptr), backToken(nullptr), files(filenames)
483+
{
484+
StdCharBufStream stream(data, size);
485+
readfile(stream,filename,outputList);
486+
}
487+
488+
simplecpp::TokenList::TokenList(const char* data, std::size_t size, std::vector<std::string> &filenames, const std::string &filename, OutputList *outputList)
489+
: frontToken(nullptr), backToken(nullptr), files(filenames)
490+
{
491+
StdCharBufStream stream(reinterpret_cast<const unsigned char*>(data), size);
492+
readfile(stream,filename,outputList);
493+
}
494+
445495
simplecpp::TokenList::TokenList(const std::string &filename, std::vector<std::string> &filenames, OutputList *outputList)
446496
: frontToken(nullptr), backToken(nullptr), files(filenames)
447497
{
@@ -1447,8 +1497,7 @@ namespace simplecpp {
14471497

14481498
Macro(const std::string &name, const std::string &value, std::vector<std::string> &f) : nameTokDef(nullptr), files(f), tokenListDefine(f), valueDefinedInCode_(false) {
14491499
const std::string def(name + ' ' + value);
1450-
std::istringstream istr(def);
1451-
StdIStream stream(istr);
1500+
StdCharBufStream stream(reinterpret_cast<const unsigned char*>(def.data()), def.size());
14521501
tokenListDefine.readfile(stream);
14531502
if (!parseDefine(tokenListDefine.cfront()))
14541503
throw std::runtime_error("bad macro syntax. macroname=" + name + " value=" + value);

externals/simplecpp/simplecpp.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,10 @@ namespace simplecpp {
198198
explicit TokenList(std::vector<std::string> &filenames);
199199
/** generates a token list from the given std::istream parameter */
200200
TokenList(std::istream &istr, std::vector<std::string> &filenames, const std::string &filename=std::string(), OutputList *outputList = nullptr);
201+
/** generates a token list from the given buffer */
202+
TokenList(const unsigned char* data, std::size_t size, std::vector<std::string> &filenames, const std::string &filename=std::string(), OutputList *outputList = nullptr);
203+
/** generates a token list from the given buffer */
204+
TokenList(const char* data, std::size_t size, std::vector<std::string> &filenames, const std::string &filename=std::string(), OutputList *outputList = nullptr);
201205
/** generates a token list from the given filename parameter */
202206
TokenList(const std::string &filename, std::vector<std::string> &filenames, OutputList *outputList = nullptr);
203207
TokenList(const TokenList &other);

gui/mainwindow.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -649,7 +649,10 @@ void MainWindow::analyzeCode(const QString& code, const QString& filename)
649649
checkLockDownUI();
650650
clearResults();
651651
mUI->mResults->checkingStarted(1);
652-
cppcheck.check(filename.toStdString(), code.toStdString());
652+
{
653+
const std::string code_s = code.toStdString();
654+
cppcheck.check(filename.toStdString(), reinterpret_cast<const std::uint8_t*>(code_s.data()), code_s.size());
655+
}
653656
analysisDone();
654657

655658
// Expand results

lib/cppcheck.cpp

Lines changed: 35 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -547,10 +547,9 @@ unsigned int CppCheck::check(const std::string &path)
547547
return checkFile(Path::simplifyPath(path), emptyString);
548548
}
549549

550-
unsigned int CppCheck::check(const std::string &path, const std::string &content)
550+
unsigned int CppCheck::check(const std::string &path, const uint8_t* data, std::size_t size)
551551
{
552-
std::istringstream iss(content);
553-
return checkFile(Path::simplifyPath(path), emptyString, &iss);
552+
return checkBuffer(Path::simplifyPath(path), emptyString, data, size);
554553
}
555554

556555
unsigned int CppCheck::check(const FileSettings &fs)
@@ -590,15 +589,41 @@ unsigned int CppCheck::check(const FileSettings &fs)
590589
return returnValue;
591590
}
592591

593-
static simplecpp::TokenList createTokenList(const std::string& filename, std::vector<std::string>& files, simplecpp::OutputList* outputList, std::istream* fileStream)
592+
unsigned int CppCheck::checkBuffer(const std::string& filename, const std::string &cfgname, const uint8_t* data, std::size_t size)
594593
{
595-
if (fileStream)
596-
return {*fileStream, files, filename, outputList};
594+
return checkInternal(filename, cfgname,
595+
[&filename, data, size](TokenList& list) {
596+
list.createTokens(data, size, filename);
597+
},
598+
[&filename, data, size](std::vector<std::string>& files, simplecpp::OutputList* outputList) {
599+
return simplecpp::TokenList{data, size, files, filename, outputList};
600+
});
601+
}
602+
603+
unsigned int CppCheck::checkStream(const std::string& filename, const std::string &cfgname, std::istream& fileStream)
604+
{
605+
return checkInternal(filename, cfgname,
606+
[&filename, &fileStream](TokenList& list) {
607+
list.createTokens(fileStream, filename);
608+
},
609+
[&filename, &fileStream](std::vector<std::string>& files, simplecpp::OutputList* outputList) {
610+
return simplecpp::TokenList{fileStream, files, filename, outputList};
611+
});
612+
}
597613

598-
return {filename, files, outputList};
614+
unsigned int CppCheck::checkFile(const std::string& filename, const std::string &cfgname)
615+
{
616+
return checkInternal(filename, cfgname,
617+
[&filename](TokenList& list) {
618+
std::ifstream in(filename);
619+
list.createTokens(in, filename);
620+
},
621+
[&filename](std::vector<std::string>& files, simplecpp::OutputList* outputList) {
622+
return simplecpp::TokenList{filename, files, outputList};
623+
});
599624
}
600625

601-
unsigned int CppCheck::checkFile(const std::string& filename, const std::string &cfgname, std::istream* fileStream)
626+
unsigned int CppCheck::checkInternal(const std::string& filename, const std::string &cfgname, const CreateTokensFn& createTokens, const CreateTokenListFn& createTokenList)
602627
{
603628
// TODO: move to constructor when CppCheck no longer owns the settings
604629
if (mSettings.checks.isEnabled(Checks::unusedFunction) && !mUnusedFunctionsCheck)
@@ -644,21 +669,15 @@ unsigned int CppCheck::checkFile(const std::string& filename, const std::string
644669
// this is not a real source file - we just want to tokenize it. treat it as C anyways as the language needs to be determined.
645670
Tokenizer tokenizer(mSettings, *this);
646671
tokenizer.list.setLang(Standards::Language::C);
647-
if (fileStream) {
648-
tokenizer.list.createTokens(*fileStream, filename);
649-
}
650-
else {
651-
std::ifstream in(filename);
652-
tokenizer.list.createTokens(in, filename);
653-
}
672+
createTokens(tokenizer.list);
654673
mUnusedFunctionsCheck->parseTokens(tokenizer, mSettings);
655674
}
656675
return EXIT_SUCCESS;
657676
}
658677

659678
simplecpp::OutputList outputList;
660679
std::vector<std::string> files;
661-
simplecpp::TokenList tokens1 = createTokenList(filename, files, &outputList, fileStream);
680+
simplecpp::TokenList tokens1 = createTokenList(files, &outputList);
662681

663682
// If there is a syntax error, report it and stop
664683
const auto output_it = std::find_if(outputList.cbegin(), outputList.cend(), [](const simplecpp::Output &output){

lib/cppcheck.h

Lines changed: 41 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,10 @@ class CheckUnusedFunctions;
4646
class Tokenizer;
4747
class FileWithDetails;
4848

49-
namespace simplecpp { class TokenList; }
49+
namespace simplecpp {
50+
class TokenList;
51+
struct Output;
52+
}
5053

5154
/// @addtogroup Core
5255
/// @{
@@ -96,12 +99,13 @@ class CPPCHECKLIB CppCheck : ErrorLogger {
9699
* the disk but the content is given in @p content. In errors the @p path
97100
* is used as a filename.
98101
* @param path Path to the file to check.
99-
* @param content File content as a string.
102+
* @param data File content as a buffer.
103+
* @param size Size of buffer.
100104
* @return amount of errors found or 0 if none were found.
101105
* @note You must set settings before calling this function (by calling
102106
* settings()).
103107
*/
104-
unsigned int check(const std::string &path, const std::string &content);
108+
unsigned int check(const std::string &path, const uint8_t* data, std::size_t size);
105109

106110
/**
107111
* @brief Get reference to current settings.
@@ -176,7 +180,40 @@ class CPPCHECKLIB CppCheck : ErrorLogger {
176180
* @param fileStream stream the file content can be read from
177181
* @return number of errors found
178182
*/
179-
unsigned int checkFile(const std::string& filename, const std::string &cfgname, std::istream* fileStream = nullptr);
183+
unsigned int checkStream(const std::string& filename, const std::string &cfgname, std::istream& fileStream);
184+
185+
186+
/**
187+
* @brief Check a file
188+
* @param filename the file name to be read from
189+
* @param cfgname cfg name
190+
* @return number of errors found
191+
*/
192+
unsigned int checkFile(const std::string& filename, const std::string &cfgname);
193+
194+
/**
195+
* @brief Check a file using buffer
196+
* @param filename file name
197+
* @param cfgname cfg name
198+
* @param data the data to be read
199+
* @param size the size of the data to be read
200+
* @return number of errors found
201+
*/
202+
unsigned int checkBuffer(const std::string& filename, const std::string &cfgname, const uint8_t* data, std::size_t size);
203+
204+
using CreateTokensFn = std::function<void (TokenList&)>;
205+
// TODO: should use simplecpp::OutputList
206+
using CreateTokenListFn = std::function<simplecpp::TokenList(std::vector<std::string>&, std::list<simplecpp::Output>*)>;
207+
208+
/**
209+
* @brief Check a file using stream
210+
* @param filename file name
211+
* @param cfgname cfg name
212+
* @param createTokens a function to create the tokens with
213+
* @param createTokenList a function to create the TokenList with
214+
* @return number of errors found
215+
*/
216+
unsigned int checkInternal(const std::string& filename, const std::string &cfgname, const CreateTokensFn& createTokens, const CreateTokenListFn& createTokenList);
180217

181218
/**
182219
* @brief Check normal tokens

lib/tokenlist.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,17 @@ bool TokenList::createTokens(std::istream &code, const std::string& file0)
345345

346346
//---------------------------------------------------------------------------
347347

348+
bool TokenList::createTokens(const uint8_t* data, size_t size, const std::string& file0)
349+
{
350+
ASSERT_LANG(!file0.empty());
351+
352+
appendFileIfNew(file0);
353+
354+
return createTokensInternal(data, size, file0);
355+
}
356+
357+
//---------------------------------------------------------------------------
358+
348359
bool TokenList::createTokens(std::istream &code, Standards::Language lang)
349360
{
350361
ASSERT_LANG(lang != Standards::Language::None);
@@ -359,6 +370,20 @@ bool TokenList::createTokens(std::istream &code, Standards::Language lang)
359370

360371
//---------------------------------------------------------------------------
361372

373+
bool TokenList::createTokens(const uint8_t* data, size_t size, Standards::Language lang)
374+
{
375+
ASSERT_LANG(lang != Standards::Language::None);
376+
if (mLang == Standards::Language::None) {
377+
mLang = lang;
378+
} else {
379+
ASSERT_LANG(lang == mLang);
380+
}
381+
382+
return createTokensInternal(data, size, "");
383+
}
384+
385+
//---------------------------------------------------------------------------
386+
362387
bool TokenList::createTokensInternal(std::istream &code, const std::string& file0)
363388
{
364389
simplecpp::OutputList outputList;
@@ -371,6 +396,18 @@ bool TokenList::createTokensInternal(std::istream &code, const std::string& file
371396

372397
//---------------------------------------------------------------------------
373398

399+
bool TokenList::createTokensInternal(const uint8_t* data, size_t size, const std::string& file0)
400+
{
401+
simplecpp::OutputList outputList;
402+
simplecpp::TokenList tokens(data, size, mFiles, file0, &outputList);
403+
404+
createTokens(std::move(tokens));
405+
406+
return outputList.empty();
407+
}
408+
409+
//---------------------------------------------------------------------------
410+
374411
// NOLINTNEXTLINE(cppcoreguidelines-rvalue-reference-param-not-moved)
375412
void TokenList::createTokens(simplecpp::TokenList&& tokenList)
376413
{

lib/tokenlist.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,23 @@ class CPPCHECKLIB TokenList {
105105
* @param file0 source file name
106106
*/
107107
bool createTokens(std::istream &code, const std::string& file0);
108+
bool createTokens(const uint8_t* data, size_t size, const std::string& file0);
109+
bool createTokens(const char* data, size_t size, const std::string& file0) {
110+
return createTokens(reinterpret_cast<const uint8_t*>(data), size, file0);
111+
}
112+
template<size_t size>
113+
bool createTokens(const char (&data)[size], const std::string& file0) {
114+
return createTokens(reinterpret_cast<const uint8_t*>(data), size-1, file0);
115+
}
108116
bool createTokens(std::istream &code, Standards::Language lang);
117+
bool createTokens(const uint8_t* data, size_t size, Standards::Language lang);
118+
bool createTokens(const char* data, size_t size, Standards::Language lang) {
119+
return createTokens(reinterpret_cast<const uint8_t*>(data), size, lang);
120+
}
121+
template<size_t size>
122+
bool createTokens(const char (&data)[size], Standards::Language lang) {
123+
return createTokens(reinterpret_cast<const uint8_t*>(data), size-1, lang);
124+
}
109125

110126
void createTokens(simplecpp::TokenList&& tokenList);
111127

@@ -204,6 +220,7 @@ class CPPCHECKLIB TokenList {
204220
void determineCppC();
205221

206222
bool createTokensInternal(std::istream &code, const std::string& file0);
223+
bool createTokensInternal(const uint8_t* data, std::size_t size, const std::string& file0);
207224

208225
/** Token list */
209226
TokensFrontBack mTokensFrontBack;

oss-fuzz/main.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,12 @@ class DummyErrorLogger : public ErrorLogger {
3838

3939
static DummyErrorLogger s_errorLogger;
4040

41-
static void doCheck(const std::string& code)
41+
static void doCheck(const uint8_t *data, size_t dataSize)
4242
{
4343
CppCheck cppcheck(s_errorLogger, false, nullptr);
4444
cppcheck.settings().addEnabled("all");
4545
cppcheck.settings().certainty.setEnabled(Certainty::inconclusive, true);
46-
cppcheck.check("test.cpp", code);
46+
cppcheck.check("test.cpp", data, dataSize);
4747
}
4848

4949
#ifndef NO_FUZZ
@@ -53,7 +53,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t dataSize)
5353
{
5454
if (dataSize < 10000) {
5555
const std::string code = generateCode2(data, dataSize);
56-
doCheck(code);
56+
doCheck(reinterpret_cast<const unsigned char*>(code.data()), code.size());
5757
}
5858
return 0;
5959
}
@@ -77,7 +77,7 @@ int main(int argc, char * argv[])
7777

7878
const std::string code = oss.str();
7979
for (int i = 0; i < cnt; ++i)
80-
doCheck(code);
80+
doCheck(reinterpret_cast<const unsigned char*>(code.data()), code.size());
8181

8282
return EXIT_SUCCESS;
8383
}

0 commit comments

Comments
 (0)