From 91746e0f1e4c2feb20f08e1f7a205a490b584e8c Mon Sep 17 00:00:00 2001 From: Zeki Mokhtarzada Date: Thu, 23 Mar 2017 10:02:15 -0400 Subject: [PATCH 1/2] Fix a bug where sometimes read does not return a full record even though we are not at the end of the file causing the parser to crash. Instead, we read the next chunk and keep moving along. --- src/main/java/com/thebuzzmedia/cloudfront/LogParser.java | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/thebuzzmedia/cloudfront/LogParser.java b/src/main/java/com/thebuzzmedia/cloudfront/LogParser.java index 4c03f3c..dcc2daf 100644 --- a/src/main/java/com/thebuzzmedia/cloudfront/LogParser.java +++ b/src/main/java/com/thebuzzmedia/cloudfront/LogParser.java @@ -216,7 +216,12 @@ public void parse(InputStream stream, ILogParserCallback callback) int lfIndex = ArrayUtils.lastIndexOfNoCheck(LF, buffer, index, length); - if (lfIndex == -1) + if (lfIndex == -1 && length < buffer.length / 2) { + // read did not return enough. + System.out.println("reading more without processing, (read() returned less than one record)"); + index = length; + continue; + } else if (lfIndex == -1) { throw new MalformedContentException( "Could not find the \\n (LINE FEED) character after scanning " + length @@ -227,7 +232,7 @@ public void parse(InputStream stream, ILogParserCallback callback) + " bytes). The log file is likely malformed or a single log entry line is so long it won't fit easily into the current read buffer. Consider making the buffer bigger by adjust the " + BUFFER_SIZE_PROPERTY_NAME + " system property."); - + } // Decode the buffer contents up to our LF terminator char[] content = DecodingUtils.decode(buffer, DecodingUtils.ASCII_CHARSET, index, lfIndex + 1); From 7676bf1cfc573c0f91c53eaf78617f5198d545a2 Mon Sep 17 00:00:00 2001 From: Zeki Mokhtarzada Date: Sun, 23 Feb 2020 22:11:22 -0500 Subject: [PATCH 2/2] log a message when there are more fields than there are headers and ignore extra fields --- src/main/java/com/thebuzzmedia/cloudfront/LogParser.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/main/java/com/thebuzzmedia/cloudfront/LogParser.java b/src/main/java/com/thebuzzmedia/cloudfront/LogParser.java index dcc2daf..a84dedb 100644 --- a/src/main/java/com/thebuzzmedia/cloudfront/LogParser.java +++ b/src/main/java/com/thebuzzmedia/cloudfront/LogParser.java @@ -415,6 +415,13 @@ protected void parseLogEntry(char[] line, int index, int length, continue; } + if (valueIndex > activeFieldIndices.size()) { + System.out.println("Error, line has more fields than it should at " + valueIndex + + " line: " + (new String(line)).substring(index, index+length)); + skippedFieldPositionSet.add(valueIndex); + continue; + } + // Value belonged to an active field, so store it. logEntryWrapper.setFieldValue(activeFieldIndices.get(valueIndex++) .intValue(), token.getValue());