|
16 | 16 |
|
17 | 17 | package org.embulk.guess.csv; |
18 | 18 |
|
| 19 | +import com.fasterxml.jackson.databind.JsonNode; |
19 | 20 | import com.fasterxml.jackson.databind.ObjectMapper; |
20 | 21 | import java.util.ArrayList; |
21 | 22 | import java.util.Arrays; |
@@ -190,9 +191,41 @@ private GuessedType guessType(final Object value) { |
190 | 191 | return null; |
191 | 192 | } |
192 | 193 |
|
| 194 | + // It was implemented as below when SchemaGuess was implemented with Ruby. |
| 195 | + // |
| 196 | + // begin |
| 197 | + // JSON.parse(str) |
| 198 | + // return "json" |
| 199 | + // rescue |
| 200 | + // end |
| 201 | + // |
| 202 | + // The 'json' gem 1.8.X raised JSON::ParserError by default because an older JSON RFC 4627 |
| 203 | + // accepted only an object an an array as its top-level value. |
| 204 | + // https://datatracker.ietf.org/doc/html/rfc4627#section-2 |
| 205 | + // |
| 206 | + // The 'json' gem 2.0+ started to accept any JSON value because a newer JSON RFC 7159 |
| 207 | + // changed the constraint that it be an object or array. |
| 208 | + // https://datatracker.ietf.org/doc/html/rfc7159#section-2 |
| 209 | + // https://bugs.ruby-lang.org/issues/13070 |
| 210 | + // https://bugs.ruby-lang.org/issues/14054 |
| 211 | + // |
| 212 | + // Embulk till v0.10.21 had expected (embedded) JRuby 9.1.15.0, which bundled 'json' 1.8.X. |
| 213 | + // JSON.parse(str) here did not accept a quoted string such as '"example_string"'. |
| 214 | + // |
| 215 | + // (JFYI, JRuby 9.2+ bundles 'json' 2.0+. If a user used JRuby 9.2+ with Embulk v0.10.22+, |
| 216 | + // the schema guess should have behaved a little bit different against a quoted string.) |
| 217 | + // |
| 218 | + // We replaced JSON.parse(str) to Jackson ObjectMapper#readTree(str) when reimplementing |
| 219 | + // the guess in Java. On the other hand, Jackson's ObjectMapper followed the new RFC. |
| 220 | + // |
| 221 | + // Therefore, we introduced an explicit check to accept only an object or an array so that: |
| 222 | + // 1) The guess keeps compatible with older versions. |
| 223 | + // 2) The guess behaves more natural -- just a quoted string is naturally parsed as STRING. |
193 | 224 | try { |
194 | | - new ObjectMapper().readTree(str); |
195 | | - return GuessedType.JSON; |
| 225 | + final JsonNode node = new ObjectMapper().readTree(str); |
| 226 | + if (node.isContainerNode()) { |
| 227 | + return GuessedType.JSON; |
| 228 | + } |
196 | 229 | } catch (final Exception ex) { |
197 | 230 | // Pass-through. |
198 | 231 | } |
|
0 commit comments