From e85f221064044fb049195ef201bf05a6c3e6bedc Mon Sep 17 00:00:00 2001 From: Nick Date: Sun, 31 Mar 2019 18:11:19 -0400 Subject: [PATCH 1/4] Enable binary decoding. There are a lot of fiddly details about transiting between different data encodings. And the QR spec is a bit vague on some parts. I've detailed the problems at the sister project https://github.com/nu-book/zxing-cpp/issues/62 I've decided to patch this in a works-for-me kind of way: unmarked encodings are treated as bianry and if that's not good enough the higher layers have the source material and can try decoding it themselves. --- core/src/zxing/common/StringUtils.cpp | 9 +++++- .../qrcode/decoder/DecodedBitStreamParser.cpp | 29 +++++++++++++------ 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/core/src/zxing/common/StringUtils.cpp b/core/src/zxing/common/StringUtils.cpp index 30f9325..e5cea76 100644 --- a/core/src/zxing/common/StringUtils.cpp +++ b/core/src/zxing/common/StringUtils.cpp @@ -25,7 +25,9 @@ using namespace zxing::common; // N.B.: these are the iconv strings for at least some versions of iconv -char const* const StringUtils::PLATFORM_DEFAULT_ENCODING = "UTF-8"; +char const* const StringUtils::PLATFORM_DEFAULT_ENCODING = "ISO8859-1"; + // This default is an 8-bit fixed-width encoding and it's identical with + // unicode in its range, which reduces accidental mangling during decode. char const* const StringUtils::ASCII = "ASCII"; char const* const StringUtils::SHIFT_JIS = "SHIFT_JIS"; char const* const StringUtils::GB2312 = "GBK"; @@ -76,6 +78,11 @@ StringUtils::guessEncoding(char* bytes, int length, int value = bytes[i] & 0xFF; + // embedded nuls are a sure sign of binary data + if(value == '\x00') { + canBeISO88591 = canBeShiftJIS = canBeUTF8 = false; + } + // UTF-8 stuff if (canBeUTF8) { if (utf8BytesLeft > 0) { diff --git a/core/src/zxing/qrcode/decoder/DecodedBitStreamParser.cpp b/core/src/zxing/qrcode/decoder/DecodedBitStreamParser.cpp index 1d36bba..40f5c32 100644 --- a/core/src/zxing/qrcode/decoder/DecodedBitStreamParser.cpp +++ b/core/src/zxing/qrcode/decoder/DecodedBitStreamParser.cpp @@ -67,6 +67,12 @@ void DecodedBitStreamParser::append(std::string &result, return; } + if(src == NULL) { + // don't try to recode un-encoded data. + result.append((const char *)bufIn, nIn); + return; + } + iconv_t cd = iconv_open(StringUtils::UTF8, src); if (cd == (iconv_t)-1) { result.append((const char *)bufIn, nIn); @@ -193,19 +199,24 @@ void DecodedBitStreamParser::decodeByteSegment(Ref bits_, for (int i = 0; i < count; i++) { readBytes[i] = (char) bits.readBits(8); } - string encoding; - if (currentCharacterSetECI == 0) { - // The spec isn't clear on this mode; see - // section 6.4.5: t does not say which encoding to assuming - // upon decoding. I have seen ISO-8859-1 used as well as - // Shift_JIS -- without anything like an ECI designator to - // give a hint. - encoding = StringUtils::guessEncoding(readBytes, count, hints); + const char* encoding = NULL; + if (currentCharacterSetECI == NULL) { + // The spec says + // 8.3.1: The default interpretation for QR Code is ECI 000020 representing the JIS8 and Shift JIS character sets. + // 8.4.4: In [8-bit Byte Mode], one 8 bit codeword directly represents the JIS8 character [...]. + // In ECIs other than the default ECI, it represents an 8-bit byte value directly. + // If I'm reading that right, *if* the character set is unspecified *or* explicitly set to Shift-JIS, + // *then* use JIS8 but *otherwise* don't try to decode the value. + // + // That's a stupid spec. + // Instead we follow qrencode: + // unspecified ECI <=> unmolested binary + encoding = NULL; } else { encoding = currentCharacterSetECI->name(); } try { - append(result, readBytes, nBytes, encoding.c_str()); + append(result, readBytes, nBytes, encoding); } catch (ReaderException const& ignored) { (void)ignored; throw FormatException(); From d6266b0fa1a12367cc7c18dd045171ad67530e25 Mon Sep 17 00:00:00 2001 From: Nick Date: Sun, 31 Mar 2019 18:42:22 -0400 Subject: [PATCH 2/4] Test binary data decoding --- .travis.yml | 8 +++++++- tests/amen-01.bin | Bin 0 -> 857 bytes tests/amen-01.png | Bin 0 -> 2457 bytes tests/test.sh | 10 ++++++++++ 4 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 tests/amen-01.bin create mode 100644 tests/amen-01.png create mode 100755 tests/test.sh diff --git a/.travis.yml b/.travis.yml index 8dae417..db82257 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,2 +1,8 @@ language: cpp -script: mkdir build && cd build && cmake -G "Unix Makefiles" .. && make +script: + - mkdir build + - cd build + - cmake -G "Unix Makefiles" .. + - make + - export PATH=$(pwd):$PATH + - ../tests/test.sh diff --git a/tests/amen-01.bin b/tests/amen-01.bin new file mode 100644 index 0000000000000000000000000000000000000000..b30e1538b806ea7d6da1bdf29c7ad4b18b8206ef GIT binary patch literal 857 zcmezWx#0){3`At+r32agKy1UnpzxZ3g_l=QP*hSRZC0P(9qP>!p6qI!Oh#- z-#<7kEGjB4BcrIOw6e0cwy~w9qoZfSgeg;I%$_}e{^I4!*R0vFb?eTZdk-EwcI?#I zvllO3y?OKQ-A7NKzI^%ika z3>qMJScLw61Ck&D{Qn$qgh67q+!97-4F`7&?2HJJ)uQWdLf7ZmFHFhc8m>)AAam!$o%m0{y)#H|9=u_`Mjq^QOil7 zMUnHX<-OuL&n?gWKUe7Bc;JT}2a{y|r!E#oMFs%}PR6u8B?UKQEunh1|DP8e5ps(W z+i-Q}0fzczOdC`g7A{CV&A`>gRP#9Rr-4=}OKYj4=lbm5$Y;)<04 z%aXrN*>ohNT;t(mu4$h)-My&z^XM73DmG))tRz9z=+|iy!4-e(KA(DY?Xlrmr?|9K z@xCoHRqm^P%}v)`yK8FVD%opYW*TLZ^(&uFnw4?ZWRszP{G_Z+(wnB$itSWCcIM1# zsozuGQ*T7{dN2RkvS`+Kb6>fg;jd1uy%0XZOZ}wrt7feXrK;oOf$xUGDSnmn#eptt%}&{Vv@%?d#cxYXg(B=Dqd1 z|6cFj?r&dpj!W^ZiJY1~ZFk)CpGou9MqlN6r=Bn=h?}EF=GEswlldtb^3P6fl$)7Y z_kX<-XW)(m=7(I*|9^Hkq9Wf_b%gu8149SXrUfe;#5h?FJr=viK2^ym)OWd!+%-$3 z*Ckz*ul=?^-Zg9ItQ|AoT;IKWcIz;CPme@-~UAiw(B0iHuHGY|1b6f8LsrqyG)a$l}l*ARNe5Dq)@K&1)TJoX-2aIvFYP b>h0Vso3+jbU0Ct$=85(c-)~&NiG6zYdcXd4PL*Jo3|V_P*|p z=JM;y$1BuTTv`A#m^{F#?&kQm`Z+PolFwCh9OouZ>oAw&6o=WmY{1r*=Ukh&>S*@4 zyzI14GCkd7&#ByV0=sC;P@Gt$y7$c%)^{+o^74y?H6u_fQ}?cU1->r_>SAhkUa>BY z^>XX@`x4#?R6Wl1|Gj=I+KDu!X2D;O2N;iMYfcXoMzdN7wSzqEeo1MSEX zF;%p}`cf}8n_r4Nz{{5}7Xh*?#dJOl!lU+2KGYj)LT!GE;(j0{-%)yoyp-d-JUmOZ zbQHo-x1oL>XVRZ6z<8`%h|EOq&nN3(%fmHH&luM@G16L1nFqKdFHT(!;kAC#6$!}p}vQAAp^#%1yPHf^ko~GOu)_Y7#%BL&`%KQ_KpFr!-1DUT~8Wf$ODX! zjibvMCht=Yv;xAuGDG|V8g%<1soQ}tJx=ZIn2ctSo>Py=4k+pc$kN3oW&#Gtm35Gx z>P>#iTy2=gqoEQXo(7G%fM%79TXK#xpJT{0A5D-bfl8Uxqd9;v%|}bO$;~zUL51>B zycLp;iC!;+Hxuw+?h$sJBLuLtA{&aLd3iZ389I$U3vjbIMpXgCgN#3vPLN!NDLQO< z@+E-#fztCJPqEJ`gczHVDe*)EG!}7_kIn?#`2Uc?6f?wwysuG}2=B67O*~|^Vjf_O zbO4QBZlNSMP?BT?*{et_L+hBQFZW~s!Y+)CNVMYP;sNt9YNH`8O^iw2GXa?xTJ2bh zgVGRcP!yJj8l|^9Y=%5QCLWb5MC~Ho2i${ujc_n>hs2@k&6o!m)%RE@W{9Ow5T`F` z%5GM7R+EaQ$OGIQV`P9BRcf#t^=3m5-)w;odimB5?PUN$Im)~1g+ZnqCOIkIWL%@V za3wevd4M$cIAIl=PZUGh-X88}o=PP%m!>KYFmaD1Te0+x!i3|&>O<){h4~p{24G+s zIVS7JKi&=82%X9V zco|&e**l7YP%gFUl@!ShUy`~V7*pbZ`E~s6@hAvVkJz_D^CS_d@&HLlq_J*lqJlU9 z5#nDUp#WD%NoUh1qH+RUx&-%S&$=w(hiF>ywKN@NW5Lx!@t~WV1wd zmj}og9}pXii*J&oB(<5-={N3Xh8xaIz|EI19}s#Xd05nJR(FYkTEbkc zxqz|3vORPTCRK;K`8eq!_8z)PoL zL~tgi(G1O{HtFX@n|$H31r;*^qjLZ9@MoVWP~Go8*M(9P)}@l=nFok}(GJJK16G51 zqB-~m`d3uNT#Vl>pmXiq_W)yd4P1PldUroSK;aF&L{H!rV4A(|jZ*4cq$>sq@Ey((|bRiZ(;Bf7Uaoel#$ePn`;%d&mi)@N7S%B#*PDenkE}(s`Ww9DM=l?|H;@2kN-M~#WA5#`d zTOfpQZDVUs_f7Xl-8k6u0CoRSC+fO-xkpoh$WbbSn=8}h$8T1+ADD{0nA5#K`{Y8M zKo%k-5uTPS8&Ij}XkNO$ftb^I*j1dA&gELKOLKchV5#*-<#a8sQAUYrebRLj-66^Z zR0`41MrE=^d&%NmT~ks?I(?|f15CHM70WE}l6mjb=pI3b-tWW*dmi8)e*FXf^}zoC X+<1mML3pG`00000NkvXXu0mjf%$SdI literal 0 HcmV?d00001 diff --git a/tests/test.sh b/tests/test.sh new file mode 100755 index 0000000..4c89596 --- /dev/null +++ b/tests/test.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +set -ex + +cd "$(dirname "$0")"; + +for file in $(ls *.png); do + #zxing has a --test-mode but it's not working for binary files + zxing "$file" | diff $(ls "${file%.*}".{txt,bin} 2>/dev/null) - +done From cf29c473743c1a5f9b6b44b6a81afc619727e299 Mon Sep 17 00:00:00 2001 From: Nick Date: Sun, 31 Mar 2019 18:57:07 -0400 Subject: [PATCH 3/4] Tidy tests --- tests/test.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test.sh b/tests/test.sh index 4c89596..3c7e825 100755 --- a/tests/test.sh +++ b/tests/test.sh @@ -1,10 +1,10 @@ #!/bin/bash -set -ex cd "$(dirname "$0")"; -for file in $(ls *.png); do +find . -name "*.png" | while read file; do #zxing has a --test-mode but it's not working for binary files - zxing "$file" | diff $(ls "${file%.*}".{txt,bin} 2>/dev/null) - + # perl is here because in textmode, zxing appends a newline, but a lot of test files don't have that + zxing "$file" | perl -pe 'chomp if eof' | diff -u "$(ls "${file%.*}".{txt,bin} 2>/dev/null)" - done From a306fe97763088f62d851b52fd7dceb66e3ea187 Mon Sep 17 00:00:00 2001 From: Nick Date: Tue, 2 Apr 2019 10:30:51 -0400 Subject: [PATCH 4/4] Regenerate expected test output I think I'd generated my expected output from zxing itself. This was generated from the source file with curl https://sampleswap.org//samples-ghost/DRUM%20LOOPS%20and%20BREAKS/161%20to%20180%20bpm/128[kb]161_amenvar3.aif.mp3 | head -c 856 | tee ../tests/amen-01.bin 856 is the packet size qrencode decided to chunk this file into when I first split it up. --- tests/amen-01.bin | Bin 857 -> 856 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/tests/amen-01.bin b/tests/amen-01.bin index b30e1538b806ea7d6da1bdf29c7ad4b18b8206ef..74d21bf88df2909df25853883bb103342bac14f7 100644 GIT binary patch delta 7 Ocmcb~c7tt01Tz2)Edt2^ delta 9 Qcmcb?c9U&G1T!NS020Xp)Bpeg