Skip to content

Commit d43b456

Browse files
committed
PDFBOX-5992: skip either a line break (CR, LF or CRLF) or any one-byte whitespace at the beginning of an inline image
git-svn-id: https://svn.apache.org/repos/asf/pdfbox/trunk@1925467 13f79535-47bb-0310-9956-ffa450edef68
1 parent 09dbd9c commit d43b456

File tree

2 files changed

+46
-14
lines changed

2 files changed

+46
-14
lines changed

pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java

+43-12
Original file line numberDiff line numberDiff line change
@@ -402,9 +402,9 @@ else if (value instanceof COSInteger && !((COSInteger) value).isValid())
402402
}
403403

404404
/**
405-
* Skip the upcoming CRLF or LF which are supposed to follow a stream.
405+
* Skip the upcoming CRLF or LF which are supposed to follow a stream. Trailing spaces are removed as well.
406406
*
407-
* @throws IOException
407+
* @throws IOException if something went wrong
408408
*/
409409
protected void skipWhiteSpaces() throws IOException
410410
{
@@ -418,24 +418,55 @@ protected void skipWhiteSpaces() throws IOException
418418
{
419419
whitespace = source.read();
420420
}
421+
if (!skipLinebreak(whitespace))
422+
{
423+
source.rewind(1);
424+
}
425+
}
421426

422-
if (isCR(whitespace))
427+
/**
428+
* Skip one line break, such as CR, LF or CRLF.
429+
*
430+
* @return true if a line break was found and removed.
431+
*
432+
* @throws IOException if something went wrong
433+
*/
434+
protected boolean skipLinebreak() throws IOException
435+
{
436+
// a line break is a CR, or LF or CRLF
437+
if (!skipLinebreak(source.read()))
423438
{
424-
whitespace = source.read();
425-
if (!isLF(whitespace))
439+
source.rewind(1);
440+
return false;
441+
}
442+
return true;
443+
}
444+
445+
/**
446+
* Skip one line break, such as CR, LF or CRLF.
447+
*
448+
* @param linebreak the first character to be checked.
449+
*
450+
* @return true if a line break was found and removed.
451+
*
452+
* @throws IOException if something went wrong
453+
*/
454+
private boolean skipLinebreak(int linebreak) throws IOException
455+
{
456+
// a line break is a CR, or LF or CRLF
457+
if (isCR(linebreak))
458+
{
459+
int next = source.read();
460+
if (!isLF(next))
426461
{
427462
source.rewind(1);
428-
//The spec says this is invalid but it happens in the real
429-
//world so we must support it.
430463
}
431464
}
432-
else if (!isLF(whitespace))
465+
else if (!isLF(linebreak))
433466
{
434-
//we are in an error.
435-
//but again we will do a lenient parsing and just assume that everything
436-
//is fine
437-
source.rewind(1);
467+
return false;
438468
}
469+
return true;
439470
}
440471

441472
/**

pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java

+3-2
Original file line numberDiff line numberDiff line change
@@ -273,9 +273,10 @@ else if( next.equals( "false" ) )
273273
"' at stream offset " + currentPosition);
274274
}
275275
ByteArrayOutputStream imageData = new ByteArrayOutputStream();
276-
if( isWhitespace() )
276+
// skip one line break (CR, LF or CRLF) or any one-byte whitespace
277+
if (!skipLinebreak() && isWhitespace())
277278
{
278-
//pull off the whitespace character
279+
// pull off the whitespace character
279280
source.read();
280281
}
281282
int lastByte = source.read();

0 commit comments

Comments
 (0)