From b99a4717045ca33aaa49118a040163eb1c535b40 Mon Sep 17 00:00:00 2001 From: Gennis Emerson Date: Fri, 15 Dec 2017 17:17:23 -0800 Subject: [PATCH 1/4] First support for secondary metadata query. Add support for a secondary SQL query to retrieve metadata from a second table. This first implementation supports a model in which metadata is stored in a second table in one or more rows keyed by some unique value from the original doc record. Three new config parameters are used: db.extraMetadataSql (to specify the query), db.extraMetadataSqlParameters (to list the column names whose values are used as parameters in db.extraMetadataSql), and optionally db.extraMetadataColumns (to specify the columns from the metadata result set to use). The addMetadata/addMetadataTo* methods were refactored a bit to allow addMetadata to be used for the extraMetadata table as well. The current implementation requires the key into the metadata table to be part of the doc's unique key, but this is not necessarily a requirement to support retrieving metadata from a second table. The config parameters and new methods use "extraMetadata" in their names to identify them as applying to this secondary query. The prior existence of "db.metadataColumns" as a parameter applying to the primary doc query made new parameter names starting with "db.metadata*" feel confusing. A better naming scheme might be nice. This implementation is expected to be extended with support for other metadata structures, possibly in a manner similar to the ResponseGenerator, so that should be taken into account when thinking about config parameter names. --- .../adaptor/database/DatabaseAdaptor.java | 117 +++++++++++-- .../adaptor/database/UniqueKey.java | 33 +++- .../adaptor/database/DatabaseAdaptorTest.java | 157 ++++++++++++++++++ .../adaptor/database/UniqueKeyTest.java | 35 ++++ 4 files changed, 326 insertions(+), 16 deletions(-) diff --git a/src/com/google/enterprise/adaptor/database/DatabaseAdaptor.java b/src/com/google/enterprise/adaptor/database/DatabaseAdaptor.java index c0539cf..4496cca 100644 --- a/src/com/google/enterprise/adaptor/database/DatabaseAdaptor.java +++ b/src/com/google/enterprise/adaptor/database/DatabaseAdaptor.java @@ -128,6 +128,8 @@ private static boolean isNullOrEmptyString(String str) { private String everyDocIdSql; private String singleDocContentSql; private MetadataColumns metadataColumns; + private String extraMetadataSql; + private MetadataColumns extraMetadataColumns; private ResponseGenerator respGenerator; private String aclSql; private String aclPrincipalDelimiter; @@ -150,6 +152,9 @@ public void initConfig(Config config) { // when set to true, if "db.metadataColumns" is blank, it will use all // returned columns as metadata. config.addKey("db.includeAllColumnsAsMetadata", "false"); + config.addKey("db.extraMetadataSql", ""); + config.addKey("db.extraMetadataSqlParameters", ""); + config.addKey("db.extraMetadataColumns", ""); config.addKey("db.modeOfOperation", null); config.addKey("db.updateSql", ""); config.addKey("db.aclSql", ""); @@ -234,6 +239,20 @@ public void init(AdaptorContext context) throws Exception { } } + if (!isNullOrEmptyString(cfg.getValue("db.extraMetadataSql"))) { + extraMetadataSql = cfg.getValue("db.extraMetadataSql"); + log.config("extra metadata sql: " + extraMetadataSql); + String extraMetadataColumnsConfig = + cfg.getValue("db.extraMetadataColumns"); + if ("".equals(extraMetadataColumnsConfig)) { + extraMetadataColumns = null; + log.config("extra metadata columns: Use all columns in ResultSet"); + } else { + extraMetadataColumns = new MetadataColumns(extraMetadataColumnsConfig); + log.config("extra metadata columns: " + extraMetadataColumns); + } + } + modeOfOperation = cfg.getValue("db.modeOfOperation"); log.config("mode of operation: " + modeOfOperation); @@ -311,7 +330,9 @@ public void init(AdaptorContext context) throws Exception { = new UniqueKey.Builder(cfg.getValue("db.uniqueKey")) .setDocIdIsUrl(docIdIsUrl) .setContentSqlColumns(cfg.getValue("db.singleDocContentSqlParameters")) - .setAclSqlColumns(cfg.getValue("db.aclSqlParameters")); + .setAclSqlColumns(cfg.getValue("db.aclSqlParameters")) + .setMetadataSqlColumns( + cfg.getValue("db.extraMetadataSqlParameters")); // Verify all column names. try (Connection conn = makeNewConnection()) { @@ -332,6 +353,10 @@ public void init(AdaptorContext context) throws Exception { "db.metadataColumns", metadataColumns.keySet()); } } + if (extraMetadataColumns != null) { + verifyColumnNames(conn, "db.extraMetadataSql", extraMetadataSql, + "db.extraMetadataColumns", extraMetadataColumns.keySet()); + } if (respGenerator instanceof ResponseGenerator.SingleColumnContent) { ResponseGenerator.SingleColumnContent content = (ResponseGenerator.SingleColumnContent) respGenerator; @@ -477,6 +502,9 @@ public void getDocIds(DocIdPusher pusher) throws IOException, builder.setDeleteFromIndex(true); } else if ("urlAndMetadataLister".equals(modeOfOperation)) { addMetadataToRecordBuilder(builder, rs); + if (extraMetadataSql != null) { + addExtraMetadataToRecordBuilder(builder, conn, id.getUniqueId()); + } } DocIdPusher.Record record = builder.build(); log.log(Level.FINEST, "doc id: {0}", id); @@ -501,15 +529,10 @@ private interface MetadataHandler { /* * Adds all specified metadata columns to the record or response being built. */ - private void addMetadata(MetadataHandler meta, ResultSet rs) - throws SQLException, IOException { + private void addMetadata(MetadataHandler meta, ResultSet rs, + MetadataColumns rsMetadataColumns) throws SQLException, IOException { ResultSetMetaData rsMetaData = rs.getMetaData(); - synchronized (this) { - if (metadataColumns == null) { - metadataColumns = new MetadataColumns(rsMetaData); - } - } - for (Map.Entry entry : metadataColumns.entrySet()) { + for (Map.Entry entry : rsMetadataColumns.entrySet()) { int index; try { index = rs.findColumn(entry.getKey()); @@ -633,25 +656,81 @@ private void addMetadata(MetadataHandler meta, ResultSet rs) @VisibleForTesting void addMetadataToRecordBuilder(final DocIdPusher.Record.Builder builder, ResultSet rs) throws SQLException, IOException { + synchronized (this) { + if (metadataColumns == null) { + metadataColumns = new MetadataColumns(rs.getMetaData()); + } + } addMetadata( new MetadataHandler() { @Override public void addMetadata(String k, String v) { builder.addMetadata(k, v); } }, - rs); + rs, metadataColumns); } @VisibleForTesting void addMetadataToResponse(final Response resp, ResultSet rs) throws SQLException, IOException { + synchronized (this) { + if (metadataColumns == null) { + metadataColumns = new MetadataColumns(rs.getMetaData()); + } + } addMetadata( new MetadataHandler() { @Override public void addMetadata(String k, String v) { resp.addMetadata(k, v); } }, - rs); + rs, metadataColumns); + } + + private void addExtraMetadataToRecordBuilder( + final DocIdPusher.Record.Builder builder, Connection conn, + String uniqueId) throws SQLException, IOException { + + try (PreparedStatement stmt = getExtraMetadataFromDb(conn, uniqueId); + ResultSet rs = stmt.executeQuery()) { + synchronized (this) { + if (extraMetadataColumns == null) { + extraMetadataColumns = new MetadataColumns(rs.getMetaData()); + } + } + MetadataHandler handler = new MetadataHandler() { + @Override public void addMetadata(String k, String v) { + builder.addMetadata(k, v); + } + }; + while (rs.next()) { + addMetadata(handler, rs, extraMetadataColumns); + } + } catch (SQLException ex) { + throw new IOException(ex); + } + } + + private void addExtraMetadataToResponse(final Response resp, Connection conn, + String uniqueId) throws SQLException, IOException { + try (PreparedStatement stmt = getExtraMetadataFromDb(conn, uniqueId); + ResultSet rs = stmt.executeQuery()) { + synchronized (this) { + if (extraMetadataColumns == null) { + extraMetadataColumns = new MetadataColumns(rs.getMetaData()); + } + } + MetadataHandler handler = new MetadataHandler() { + @Override public void addMetadata(String k, String v) { + resp.addMetadata(k, v); + } + }; + while (rs.next()) { + addMetadata(handler, rs, extraMetadataColumns); + } + } catch (SQLException ex) { + throw new IOException(ex); + } } /** Gives the bytes of a document referenced with id. */ @@ -675,6 +754,9 @@ public void getDocContent(Request req, Response resp) throws IOException { } // Generate response metadata first. addMetadataToResponse(resp, rs); + if (extraMetadataSql != null) { + addExtraMetadataToResponse(resp, conn, id.getUniqueId()); + } // Generate Acl if aclSql is provided. if (aclSql != null) { resp.setAcl(getAcl(conn, id.getUniqueId())); @@ -838,6 +920,14 @@ private PreparedStatement getStreamFromDb(Connection conn, return st; } + private PreparedStatement getExtraMetadataFromDb(Connection conn, + String uniqueId) throws SQLException { + PreparedStatement st = conn.prepareStatement(extraMetadataSql); + uniqueKey.setMetadataSqlValues(st, uniqueId); + log.log(Level.FINER, "about to get extra metadata: {0}", uniqueId); + return st; + } + /** * Mechanism that accepts stream of DocIdPusher.Record instances, buffers * them, and sends them when it has accumulated maximum allowed amount per @@ -921,7 +1011,7 @@ static ResponseGenerator loadResponseGenerator(Config config) { throw new InvalidConfigurationException(errmsg, ex); } } - + @VisibleForTesting static ResponseGenerator loadResponseGeneratorInternal(Method method, Map config) { @@ -1026,6 +1116,9 @@ public void getModifiedDocIds(DocIdPusher pusher) builder.setDeleteFromIndex(true); } else if ("urlAndMetadataLister".equals(modeOfOperation)) { addMetadataToRecordBuilder(builder, rs); + if (extraMetadataSql != null) { + addExtraMetadataToRecordBuilder(builder, conn, id.getUniqueId()); + } } DocIdPusher.Record record = builder.build(); log.log(Level.FINEST, "doc id: {0}", id); diff --git a/src/com/google/enterprise/adaptor/database/UniqueKey.java b/src/com/google/enterprise/adaptor/database/UniqueKey.java index 8488f86..c6c5341 100644 --- a/src/com/google/enterprise/adaptor/database/UniqueKey.java +++ b/src/com/google/enterprise/adaptor/database/UniqueKey.java @@ -55,15 +55,17 @@ static enum ColumnType { private final Map types; // types of DocId columns private final List contentSqlCols; // columns for content query private final List aclSqlCols; // columns for acl query + private final List metadataSqlCols; // columns for metadata query private final boolean docIdIsUrl; private UniqueKey(List docIdSqlCols, Map types, List contentSqlCols, List aclSqlCols, - boolean docIdIsUrl) { + List metadataSqlCols, boolean docIdIsUrl) { this.docIdSqlCols = docIdSqlCols; this.types = types; this.contentSqlCols = contentSqlCols; this.aclSqlCols = aclSqlCols; + this.metadataSqlCols = metadataSqlCols; this.docIdIsUrl = docIdIsUrl; } @@ -171,6 +173,11 @@ void setAclSqlValues(PreparedStatement st, String uniqueId) setSqlValues(st, uniqueId, aclSqlCols); } + void setMetadataSqlValues(PreparedStatement st, String uniqueId) + throws SQLException { + setSqlValues(st, uniqueId, metadataSqlCols); + } + @VisibleForTesting static String encodeSlashInData(String data) { if (-1 == data.indexOf('/') && -1 == data.indexOf('_')) { @@ -217,8 +224,8 @@ static String decodeSlashInData(String id) { @Override public String toString() { - return "UniqueKey(" + docIdSqlCols + "," + types + "," + contentSqlCols + "," - + aclSqlCols + ")"; + return "UniqueKey(" + docIdSqlCols + "," + types + "," + + contentSqlCols + "," + aclSqlCols + "," + metadataSqlCols + ")"; } /** Builder to create instances of {@code UniqueKey}. */ @@ -227,6 +234,7 @@ static class Builder { private Map types; // types of DocId columns private List contentSqlCols; // columns for content query private List aclSqlCols; // columns for acl query + private List metadataSqlCols; // columns for metadata query private boolean docIdIsUrl = false; /** @@ -345,6 +353,19 @@ Builder setAclSqlColumns(String aclSqlColumns) return this; } + Builder setMetadataSqlColumns(String metadataSqlColumns) + throws InvalidConfigurationException { + if (null == metadataSqlColumns) { + throw new NullPointerException(); + } + if (!metadataSqlColumns.trim().isEmpty()) { + this.metadataSqlCols = + splitIntoNameList("db.extraMetadataSqlParameters", + metadataSqlColumns, types.keySet()); + } + return this; + } + private static List splitIntoNameList(String paramConfig, String cols, Set validNames) { List columnNames = new ArrayList(); @@ -434,6 +455,10 @@ List getAclSqlColumns() { return aclSqlCols; } + List getMetadataSqlColumns() { + return metadataSqlCols; + } + /** Return the Map of column types. */ @VisibleForTesting Map getColumnTypes() { @@ -464,7 +489,7 @@ UniqueKey build() throws InvalidConfigurationException { + " The key must be a single string column when docId.isUrl=true."); } return new UniqueKey(docIdSqlCols, types, contentSqlCols, aclSqlCols, - docIdIsUrl); + metadataSqlCols, docIdIsUrl); } } } diff --git a/test/com/google/enterprise/adaptor/database/DatabaseAdaptorTest.java b/test/com/google/enterprise/adaptor/database/DatabaseAdaptorTest.java index d297e84..0703173 100644 --- a/test/com/google/enterprise/adaptor/database/DatabaseAdaptorTest.java +++ b/test/com/google/enterprise/adaptor/database/DatabaseAdaptorTest.java @@ -1404,6 +1404,45 @@ public void testInitVerifyColumnNames_metadataColumnAliasBad() getObjectUnderTest(moreEntries); } + @Test + public void testInitVerifyColumnNames_metadataSql_columnInUniqueKey() + throws Exception { + executeUpdate("create table data(id int, other varchar(20))"); + + Map moreEntries = new HashMap(); + moreEntries.put("db.modeOfOperation", "rowToText"); + moreEntries.put("db.uniqueKey", "id:int"); + moreEntries.put("db.extraMetadataSql", + "select other from data where id = ?"); + moreEntries.put("db.extraMetadataSqlParameters", "id"); + // Required for validation, but not specific to this test. + moreEntries.put("db.everyDocIdSql", "select id from data"); + moreEntries.put("db.singleDocContentSql", + "select * from data where id = ?"); + getObjectUnderTest(moreEntries); + } + + @Test + public void testInitVerifyColumnNames_metadataSql_columnNotInUniqueKey() + throws Exception { + executeUpdate("create table data(id int, other varchar(20))"); + + Map moreEntries = new HashMap(); + moreEntries.put("db.modeOfOperation", "rowToText"); + moreEntries.put("db.uniqueKey", "id:int"); + moreEntries.put("db.extraMetadataSql", + "select other from data where id = ?"); + moreEntries.put("db.extraMetadataSqlParameters", "other"); + // Required for validation, but not specific to this test. + moreEntries.put("db.everyDocIdSql", "select id from data"); + moreEntries.put("db.singleDocContentSql", + "select * from data where id = ?"); + thrown.expect(InvalidConfigurationException.class); + thrown.expectMessage("Unknown column 'other' from " + + "db.extraMetadataSqlParameters not found in db.uniqueKey: [id]"); + getObjectUnderTest(moreEntries); + } + @Test public void testInitVerifyColumnNames_modeOfOperation() throws Exception { executeUpdate("create table data(id int, other varchar(20))"); @@ -2643,6 +2682,124 @@ public void testMetadataColumns_columnAlias() throws Exception { assertEquals(expected, response.getMetadata()); } + @Test + public void testExtraMetadata_docAndExtra() throws Exception { + executeUpdate("create table data(id int, name varchar(200))"); + executeUpdate("create table metadata(id int, other varchar(200))"); + executeUpdate("insert into data(id, name) values (1001, 'Doe, John')"); + executeUpdate("insert into metadata(id, other) values (1001, 'John 1')"); + executeUpdate("insert into metadata(id, other) values (1001, 'John 2')"); + + Map configEntries = new HashMap(); + configEntries.put("db.uniqueKey", "id:int"); + configEntries.put("db.everyDocIdSql", "select * from data"); + configEntries.put("db.singleDocContentSql", + "select id, name from data where id = ?"); + configEntries.put("db.modeOfOperation", "rowToText"); + configEntries.put("db.metadataColumns", "name"); + configEntries.put("db.extraMetadataSql", + "select other from metadata where id = ?"); + configEntries.put("db.extraMetadataSqlParameters", "id"); + + DatabaseAdaptor adaptor = getObjectUnderTest(configEntries); + MockRequest request = new MockRequest(new DocId("1001")); + RecordingResponse response = new RecordingResponse(); + adaptor.getDocContent(request, response); + Metadata expected = new Metadata(); + expected.add("name", "Doe, John"); + expected.add("OTHER", "John 1"); + expected.add("OTHER", "John 2"); + // metadata comes from both the document row and the extra table + assertEquals(expected, response.getMetadata()); + } + + @Test + public void testExtraMetadata_onlyExtra() throws Exception { + executeUpdate("create table data(id int, name varchar(200))"); + executeUpdate("create table metadata(id int, other varchar(200))"); + executeUpdate("insert into data(id, name) values (1001, 'Doe, John')"); + executeUpdate("insert into metadata(id, other) values (1001, 'John 1')"); + executeUpdate("insert into metadata(id, other) values (1001, 'John 2')"); + + Map configEntries = new HashMap(); + configEntries.put("db.uniqueKey", "id:int"); + configEntries.put("db.everyDocIdSql", "select * from data"); + configEntries.put("db.singleDocContentSql", + "select id, name from data where id = ?"); + configEntries.put("db.modeOfOperation", "rowToText"); + // extraMetadataSql uses "select " to limit the + // columns returned as metadata + configEntries.put("db.extraMetadataSql", + "select other from metadata where id = ?"); + configEntries.put("db.extraMetadataSqlParameters", "id"); + + DatabaseAdaptor adaptor = getObjectUnderTest(configEntries); + MockRequest request = new MockRequest(new DocId("1001")); + RecordingResponse response = new RecordingResponse(); + adaptor.getDocContent(request, response); + Metadata expected = new Metadata(); + expected.add("OTHER", "John 1"); + expected.add("OTHER", "John 2"); + assertEquals(expected, response.getMetadata()); + } + + @Test + public void testExtraMetadata_extraMetadataColumns() throws Exception { + executeUpdate("create table data(id int, name varchar(200))"); + executeUpdate("create table metadata(id int, other varchar(200))"); + executeUpdate("insert into data(id, name) values (1001, 'Doe, John')"); + executeUpdate("insert into metadata(id, other) values (1001, 'John 1')"); + executeUpdate("insert into metadata(id, other) values (1001, 'John 2')"); + + Map configEntries = new HashMap(); + configEntries.put("db.uniqueKey", "id:int"); + configEntries.put("db.everyDocIdSql", "select * from data"); + configEntries.put("db.singleDocContentSql", + "select id, name from data where id = ?"); + configEntries.put("db.modeOfOperation", "rowToText"); + // extraMetadataSql uses "select *", so extraMetadataColumns + // is used to limit the columns returned as metadata + configEntries.put("db.extraMetadataSql", + "select * from metadata where id = ?"); + configEntries.put("db.extraMetadataSqlParameters", "id"); + configEntries.put("db.extraMetadataColumns", "OTHER"); + + DatabaseAdaptor adaptor = getObjectUnderTest(configEntries); + MockRequest request = new MockRequest(new DocId("1001")); + RecordingResponse response = new RecordingResponse(); + adaptor.getDocContent(request, response); + Metadata expected = new Metadata(); + expected.add("OTHER", "John 1"); + expected.add("OTHER", "John 2"); + assertEquals(expected, response.getMetadata()); + } + + @Test + public void testExtraMetadata_invalidExtraMetadataColumns() throws Exception { + executeUpdate("create table data(id int, name varchar(200))"); + executeUpdate("create table metadata(id int, other varchar(200))"); + executeUpdate("insert into data(id, name) values (1001, 'Doe, John')"); + executeUpdate("insert into metadata(id, other) values (1001, 'John 1')"); + executeUpdate("insert into metadata(id, other) values (1001, 'John 2')"); + + Map configEntries = new HashMap(); + configEntries.put("db.uniqueKey", "id:int"); + configEntries.put("db.everyDocIdSql", "select * from data"); + configEntries.put("db.singleDocContentSql", + "select id, name from data where id = ?"); + configEntries.put("db.modeOfOperation", "rowToText"); + configEntries.put("db.extraMetadataSql", + "select * from metadata where id = ?"); + configEntries.put("db.extraMetadataSqlParameters", "id"); + configEntries.put("db.extraMetadataColumns", "INVALID"); + + thrown.expect(InvalidConfigurationException.class); + thrown.expectMessage("These columns from db.extraMetadataColumns [INVALID]" + + " not found in query db.extraMetadataSql:" + + " select * from metadata where id = ?"); + DatabaseAdaptor adaptor = getObjectUnderTest(configEntries); + } + @Test public void testGetDocContentAcl() throws Exception { executeUpdate("create table data(id integer)"); diff --git a/test/com/google/enterprise/adaptor/database/UniqueKeyTest.java b/test/com/google/enterprise/adaptor/database/UniqueKeyTest.java index c4713ba..d6be3ce 100644 --- a/test/com/google/enterprise/adaptor/database/UniqueKeyTest.java +++ b/test/com/google/enterprise/adaptor/database/UniqueKeyTest.java @@ -94,6 +94,12 @@ public void testNullAclCols() { new UniqueKey.Builder("num:int").setAclSqlColumns(null); } + @Test + public void testNullMetadataCols() { + thrown.expect(NullPointerException.class); + new UniqueKey.Builder("num:int").setMetadataSqlColumns(null); + } + @Test public void testSingleInt() { UniqueKey.Builder builder = new UniqueKey.Builder("numnum:int"); @@ -187,6 +193,15 @@ public void testUnknownAclCol() { .setAclSqlColumns("numnum,IsStranger,strstr"); } + @Test + public void testUnknownMetadataCol() { + thrown.expect(InvalidConfigurationException.class); + thrown.expectMessage( + "Unknown column 'IsStranger' from db.extraMetadataSqlParameters"); + new UniqueKey.Builder("numnum:int,strstr:string") + .setMetadataSqlColumns("numnum,IsStranger,strstr"); + } + @Test public void testAddColumnTypes() { Map sqlTypes = new HashMap<>(); @@ -721,4 +736,24 @@ public void testSpacesBetweenAclSqlCols() { assertEquals(goldenNames, testBuilder.getAclSqlColumns()); } } + + @Test + public void testSpacesBetweenMetadataSqlCols() { + String ukDecl = "numnum:int,strstr:string"; + UniqueKey.Builder builder = new UniqueKey.Builder(ukDecl) + .setMetadataSqlColumns("numnum,numnum,strstr,numnum,strstr"); + List goldenNames = builder.getMetadataSqlColumns(); + + List testDecls = asList( + "numnum ,numnum,strstr,numnum,strstr", + "numnum, numnum,strstr,numnum,strstr", + "numnum , numnum,strstr,numnum,strstr", + "numnum , numnum,strstr,numnum,strstr", + "numnum , numnum , strstr , numnum,strstr"); + for (String colDecl : testDecls) { + UniqueKey.Builder testBuilder = new UniqueKey.Builder(ukDecl) + .setMetadataSqlColumns(colDecl); + assertEquals(goldenNames, testBuilder.getMetadataSqlColumns()); + } + } } From 567df68e5cc60662686813cd41741c7d9954cde6 Mon Sep 17 00:00:00 2001 From: Gennis Emerson Date: Tue, 19 Dec 2017 15:43:42 -0800 Subject: [PATCH 2/4] Code review changes. --- .../adaptor/database/DatabaseAdaptor.java | 66 ++++--- .../adaptor/database/UniqueKey.java | 30 +-- .../adaptor/database/DatabaseAdaptorTest.java | 175 +++++++++++++++++- 3 files changed, 225 insertions(+), 46 deletions(-) diff --git a/src/com/google/enterprise/adaptor/database/DatabaseAdaptor.java b/src/com/google/enterprise/adaptor/database/DatabaseAdaptor.java index 4496cca..170bb97 100644 --- a/src/com/google/enterprise/adaptor/database/DatabaseAdaptor.java +++ b/src/com/google/enterprise/adaptor/database/DatabaseAdaptor.java @@ -239,18 +239,25 @@ public void init(AdaptorContext context) throws Exception { } } - if (!isNullOrEmptyString(cfg.getValue("db.extraMetadataSql"))) { + if (!cfg.getValue("db.extraMetadataSql").isEmpty()) { extraMetadataSql = cfg.getValue("db.extraMetadataSql"); log.config("extra metadata sql: " + extraMetadataSql); String extraMetadataColumnsConfig = cfg.getValue("db.extraMetadataColumns"); - if ("".equals(extraMetadataColumnsConfig)) { + if (extraMetadataColumnsConfig.isEmpty()) { extraMetadataColumns = null; log.config("extra metadata columns: Use all columns in ResultSet"); } else { extraMetadataColumns = new MetadataColumns(extraMetadataColumnsConfig); log.config("extra metadata columns: " + extraMetadataColumns); } + } else { + if (!cfg.getValue("db.extraMetadataColumns").isEmpty()) { + ignored.add("db.extraMetadataColumns"); + } + if (!cfg.getValue("db.extraMetadataSqlParameters").isEmpty()) { + ignored.add("db.extraMetadataSqlParameters"); + } } modeOfOperation = cfg.getValue("db.modeOfOperation"); @@ -277,6 +284,15 @@ public void init(AdaptorContext context) throws Exception { } else if (!metadataColumns.isEmpty()) { ignored.add("db.metadataColumns"); } + if (!cfg.getValue("db.extraMetadataSql").isEmpty()) { + ignored.add("db.extraMetadataSql"); + } + if (!cfg.getValue("db.extraMetadataColumns").isEmpty()) { + ignored.add("db.extraMetadataColumns"); + } + if (!cfg.getValue("db.extraMetadataSqlParameters").isEmpty()) { + ignored.add("db.extraMetadataSqlParameters"); + } } } else if (singleDocContentSql.isEmpty()) { throw new InvalidConfigurationException( @@ -530,9 +546,9 @@ private interface MetadataHandler { * Adds all specified metadata columns to the record or response being built. */ private void addMetadata(MetadataHandler meta, ResultSet rs, - MetadataColumns rsMetadataColumns) throws SQLException, IOException { + MetadataColumns columns) throws SQLException, IOException { ResultSetMetaData rsMetaData = rs.getMetaData(); - for (Map.Entry entry : rsMetadataColumns.entrySet()) { + for (Map.Entry entry : columns.entrySet()) { int index; try { index = rs.findColumn(entry.getKey()); @@ -690,41 +706,33 @@ void addMetadataToResponse(final Response resp, ResultSet rs) private void addExtraMetadataToRecordBuilder( final DocIdPusher.Record.Builder builder, Connection conn, String uniqueId) throws SQLException, IOException { - - try (PreparedStatement stmt = getExtraMetadataFromDb(conn, uniqueId); - ResultSet rs = stmt.executeQuery()) { - synchronized (this) { - if (extraMetadataColumns == null) { - extraMetadataColumns = new MetadataColumns(rs.getMetaData()); + MetadataHandler handler = new MetadataHandler() { + @Override public void addMetadata(String k, String v) { + builder.addMetadata(k, v); } - } - MetadataHandler handler = new MetadataHandler() { - @Override public void addMetadata(String k, String v) { - builder.addMetadata(k, v); - } - }; - while (rs.next()) { - addMetadata(handler, rs, extraMetadataColumns); - } - } catch (SQLException ex) { - throw new IOException(ex); - } + }; + addExtraMetadata(conn, uniqueId, handler); } private void addExtraMetadataToResponse(final Response resp, Connection conn, String uniqueId) throws SQLException, IOException { - try (PreparedStatement stmt = getExtraMetadataFromDb(conn, uniqueId); + MetadataHandler handler = new MetadataHandler() { + @Override public void addMetadata(String k, String v) { + resp.addMetadata(k, v); + } + }; + addExtraMetadata(conn, uniqueId, handler); + } + + private void addExtraMetadata(Connection conn, String uniqueId, + MetadataHandler handler) throws SQLException, IOException { + try (PreparedStatement stmt = prepareExtraMetadataStatement(conn, uniqueId); ResultSet rs = stmt.executeQuery()) { synchronized (this) { if (extraMetadataColumns == null) { extraMetadataColumns = new MetadataColumns(rs.getMetaData()); } } - MetadataHandler handler = new MetadataHandler() { - @Override public void addMetadata(String k, String v) { - resp.addMetadata(k, v); - } - }; while (rs.next()) { addMetadata(handler, rs, extraMetadataColumns); } @@ -920,7 +928,7 @@ private PreparedStatement getStreamFromDb(Connection conn, return st; } - private PreparedStatement getExtraMetadataFromDb(Connection conn, + private PreparedStatement prepareExtraMetadataStatement(Connection conn, String uniqueId) throws SQLException { PreparedStatement st = conn.prepareStatement(extraMetadataSql); uniqueKey.setMetadataSqlValues(st, uniqueId); diff --git a/src/com/google/enterprise/adaptor/database/UniqueKey.java b/src/com/google/enterprise/adaptor/database/UniqueKey.java index c6c5341..d2ce3f6 100644 --- a/src/com/google/enterprise/adaptor/database/UniqueKey.java +++ b/src/com/google/enterprise/adaptor/database/UniqueKey.java @@ -116,18 +116,22 @@ String makeUniqueId(ResultSet rs) throws SQLException, URISyntaxException { private void setSqlValues(PreparedStatement st, String uniqueId, List sqlCols) throws SQLException { - // parse on / that isn't preceded by escape char _ - // (a / that is preceded by _ is part of column value) - String parts[] = uniqueId.split("(? zip = new TreeMap<>(String.CASE_INSENSITIVE_ORDER); - for (int i = 0; i < parts.length; i++) { - String columnValue = decodeSlashInData(parts[i]); - zip.put(docIdSqlCols.get(i), columnValue); + if (docIdIsUrl) { + zip.put(docIdSqlCols.get(0), uniqueId); + } else { + // parse on / that isn't preceded by escape char _ + // (a / that is preceded by _ is part of column value) + String parts[] = uniqueId.split("(? moreEntries = new HashMap(); + moreEntries.put("db.modeOfOperation", "rowToText"); + moreEntries.put("docId.isUrl", "true"); + moreEntries.put("db.uniqueKey", "url:string"); + // Required for validation, but not specific to this test. + moreEntries.put("db.everyDocIdSql", "select url from data"); + // Ignored properties in this mode. + moreEntries.put("db.extraMetadataSql", "select * from metadata"); + moreEntries.put("db.extraMetadataSqlParameters", "url"); + moreEntries.put("db.extraMetadataColumns", "other"); + + List messages = new ArrayList(); + captureLogMessages(DatabaseAdaptor.class, "will be ignored", messages); + getObjectUnderTest(moreEntries); + assertEquals(messages.toString(), 1, messages.size()); + assertThat(messages.get(0), + containsString("[db.extraMetadataColumns, db.extraMetadataSql," + + " db.extraMetadataSqlParameters]")); + } + @Test public void testInitLister_ignoredProperties_allColumns() throws Exception { executeUpdate("create table data(url varchar(200), other varchar(200))"); @@ -1145,6 +1171,28 @@ public void testInitLister_ignoredProperties_allColumns() throws Exception { containsString("[db.includeAllColumnsAsMetadata]")); } + @Test + public void testInit_ignoredPropertyes_extraMetadata() throws Exception { + executeUpdate("create table data(id int, other varchar(20))"); + + Map moreEntries = new HashMap(); + moreEntries.put("db.extraMetadataSql", ""); + moreEntries.put("db.extraMetadataSqlParameters", "id"); + moreEntries.put("db.extraMetadataColumns", "id"); + // Required for validation, but not specific to this test. + moreEntries.put("db.modeOfOperation", "rowToText"); + moreEntries.put("db.uniqueKey", "id:int"); + moreEntries.put("db.everyDocIdSql", "select id from data"); + moreEntries.put("db.singleDocContentSql", + "select * from data where id = ?"); + List messages = new ArrayList(); + captureLogMessages(DatabaseAdaptor.class, "will be ignored", messages); + getObjectUnderTest(moreEntries); + assertEquals(messages.toString(), 1, messages.size()); + assertThat(messages.get(0), containsString( + "[db.extraMetadataColumns, db.extraMetadataSqlParameters]")); + } + @Test public void testInitEmptyQuery_singleDocContentSql() throws Exception { executeUpdate("create table data(id int, other varchar(20))"); @@ -1566,6 +1614,44 @@ public void testGetDocIds_urlAndMetadataLister() throws Exception { pusher.getRecords()); } + @Test + public void testGetDocIds_urlAndMetadataLister_extraMetadata() + throws Exception { + executeUpdate("create table data(url varchar(200), name varchar(20))"); + executeUpdate( + "insert into data(url, name) values('http://localhost/','John')"); + executeUpdate( + "create table metadata(url varchar(200), other varchar(200))"); + executeUpdate("insert into metadata(url, other)" + + " values ('http://localhost/', 'John 1')"); + executeUpdate("insert into metadata(url, other)" + + " values ('http://localhost/', 'John 2')"); + + Map configEntries = new HashMap(); + configEntries.put("db.uniqueKey", "url:string"); + configEntries.put("db.everyDocIdSql", "select * from data"); + configEntries.put("db.modeOfOperation", "urlAndMetadataLister"); + configEntries.put("docId.isUrl", "true"); + configEntries.put("db.metadataColumns", "URL:col1, NAME:col2"); + configEntries.put("db.extraMetadataSql", + "select other from metadata where url = ?"); + configEntries.put("db.extraMetadataSqlParameters", "url"); + + DatabaseAdaptor adaptor = getObjectUnderTest(configEntries); + RecordingDocIdPusher pusher = new RecordingDocIdPusher(); + adaptor.getDocIds(pusher); + + Metadata metadata = new Metadata(); + metadata.add("col1", "http://localhost/"); + metadata.add("col2", "John"); + metadata.add("OTHER", "John 1"); + metadata.add("OTHER", "John 2"); + assertEquals( + Arrays.asList(new Record.Builder(new DocId("http://localhost/")) + .setMetadata(metadata).build()), + pusher.getRecords()); + } + @Test public void testGetDocIds_docIdIsUrl() throws Exception { // Add time to show the records as modified. @@ -2124,6 +2210,50 @@ public void testGetModifiedDocIds_urlAndMetadataLister() throws Exception { pusher.getRecords()); } + @Test + public void testGetModifiedDocIds_urlAndMetadataLister_extraMetadata() + throws Exception { + // Add time to show the records as modified. + executeUpdate("create table data(url varchar(20)," + + " other varchar(20), ts timestamp)"); + executeUpdate("insert into data(url, other, ts) values ('http://localhost'," + + " 'hello world', " + nowPlus(1) + ")"); + executeUpdate( + "create table metadata(url varchar(200), other varchar(200))"); + executeUpdate("insert into metadata(url, other)" + + " values ('http://localhost', 'John 1')"); + executeUpdate("insert into metadata(url, other)" + + " values ('http://localhost', 'John 2')"); + + Map moreEntries = new HashMap(); + moreEntries.put("db.modeOfOperation", "urlAndMetadataLister"); + moreEntries.put("docId.isUrl", "true"); + moreEntries.put("db.uniqueKey", "url:string"); + moreEntries.put("db.updateSql", + "select url, other from data where ts >= ?"); + moreEntries.put("db.metadataColumns", "other"); + moreEntries.put("db.extraMetadataSql", + "select other as metadataother from metadata where url = ?"); + moreEntries.put("db.extraMetadataSqlParameters", "url"); + // Required for validation, but not specific to this test. + moreEntries.put("db.everyDocIdSql", "select url, other from data"); + moreEntries.put("db.singleDocContentSql", + "select * from data where url = ?"); + + PollingIncrementalLister lister = getPollingIncrementalLister(moreEntries); + RecordingDocIdPusher pusher = new RecordingDocIdPusher(); + lister.getModifiedDocIds(pusher); + + Metadata metadata = new Metadata(); + metadata.add("other", "hello world"); + metadata.add("METADATAOTHER", "John 1"); + metadata.add("METADATAOTHER", "John 2"); + assertEquals( + Arrays.asList( + new Record.Builder(new DocId("http://localhost")) + .setMetadata(metadata).setCrawlImmediately(true).build()), + pusher.getRecords()); + } @Test public void testGetModifiedDocIds_disableStreaming() throws Exception { @@ -2692,7 +2822,6 @@ public void testExtraMetadata_docAndExtra() throws Exception { Map configEntries = new HashMap(); configEntries.put("db.uniqueKey", "id:int"); - configEntries.put("db.everyDocIdSql", "select * from data"); configEntries.put("db.singleDocContentSql", "select id, name from data where id = ?"); configEntries.put("db.modeOfOperation", "rowToText"); @@ -2700,6 +2829,8 @@ public void testExtraMetadata_docAndExtra() throws Exception { configEntries.put("db.extraMetadataSql", "select other from metadata where id = ?"); configEntries.put("db.extraMetadataSqlParameters", "id"); + // Required for validation, but not specific to this test. + configEntries.put("db.everyDocIdSql", "select * from data"); DatabaseAdaptor adaptor = getObjectUnderTest(configEntries); MockRequest request = new MockRequest(new DocId("1001")); @@ -2723,7 +2854,6 @@ public void testExtraMetadata_onlyExtra() throws Exception { Map configEntries = new HashMap(); configEntries.put("db.uniqueKey", "id:int"); - configEntries.put("db.everyDocIdSql", "select * from data"); configEntries.put("db.singleDocContentSql", "select id, name from data where id = ?"); configEntries.put("db.modeOfOperation", "rowToText"); @@ -2732,6 +2862,8 @@ public void testExtraMetadata_onlyExtra() throws Exception { configEntries.put("db.extraMetadataSql", "select other from metadata where id = ?"); configEntries.put("db.extraMetadataSqlParameters", "id"); + // Required for validation, but not specific to this test. + configEntries.put("db.everyDocIdSql", "select * from data"); DatabaseAdaptor adaptor = getObjectUnderTest(configEntries); MockRequest request = new MockRequest(new DocId("1001")); @@ -2753,7 +2885,6 @@ public void testExtraMetadata_extraMetadataColumns() throws Exception { Map configEntries = new HashMap(); configEntries.put("db.uniqueKey", "id:int"); - configEntries.put("db.everyDocIdSql", "select * from data"); configEntries.put("db.singleDocContentSql", "select id, name from data where id = ?"); configEntries.put("db.modeOfOperation", "rowToText"); @@ -2763,6 +2894,8 @@ public void testExtraMetadata_extraMetadataColumns() throws Exception { "select * from metadata where id = ?"); configEntries.put("db.extraMetadataSqlParameters", "id"); configEntries.put("db.extraMetadataColumns", "OTHER"); + // Required for validation, but not specific to this test. + configEntries.put("db.everyDocIdSql", "select * from data"); DatabaseAdaptor adaptor = getObjectUnderTest(configEntries); MockRequest request = new MockRequest(new DocId("1001")); @@ -2775,7 +2908,8 @@ public void testExtraMetadata_extraMetadataColumns() throws Exception { } @Test - public void testExtraMetadata_invalidExtraMetadataColumns() throws Exception { + public void testExtraMetadata_extraMetadataColumns_Mapping() + throws Exception { executeUpdate("create table data(id int, name varchar(200))"); executeUpdate("create table metadata(id int, other varchar(200))"); executeUpdate("insert into data(id, name) values (1001, 'Doe, John')"); @@ -2784,7 +2918,38 @@ public void testExtraMetadata_invalidExtraMetadataColumns() throws Exception { Map configEntries = new HashMap(); configEntries.put("db.uniqueKey", "id:int"); + configEntries.put("db.singleDocContentSql", + "select id, name from data where id = ?"); + configEntries.put("db.modeOfOperation", "rowToText"); + // extraMetadataSql uses "select *", so extraMetadataColumns + // is used to limit the columns returned as metadata + configEntries.put("db.extraMetadataSql", + "select * from metadata where id = ?"); + configEntries.put("db.extraMetadataSqlParameters", "id"); + configEntries.put("db.extraMetadataColumns", "other:lowercasename"); + // Required for validation, but not specific to this test. configEntries.put("db.everyDocIdSql", "select * from data"); + + DatabaseAdaptor adaptor = getObjectUnderTest(configEntries); + MockRequest request = new MockRequest(new DocId("1001")); + RecordingResponse response = new RecordingResponse(); + adaptor.getDocContent(request, response); + Metadata expected = new Metadata(); + expected.add("lowercasename", "John 1"); + expected.add("lowercasename", "John 2"); + assertEquals(expected, response.getMetadata()); + } + + @Test + public void testExtraMetadata_invalidExtraMetadataColumns() throws Exception { + executeUpdate("create table data(id int, name varchar(200))"); + executeUpdate("create table metadata(id int, other varchar(200))"); + executeUpdate("insert into data(id, name) values (1001, 'Doe, John')"); + executeUpdate("insert into metadata(id, other) values (1001, 'John 1')"); + executeUpdate("insert into metadata(id, other) values (1001, 'John 2')"); + + Map configEntries = new HashMap(); + configEntries.put("db.uniqueKey", "id:int"); configEntries.put("db.singleDocContentSql", "select id, name from data where id = ?"); configEntries.put("db.modeOfOperation", "rowToText"); @@ -2792,6 +2957,8 @@ public void testExtraMetadata_invalidExtraMetadataColumns() throws Exception { "select * from metadata where id = ?"); configEntries.put("db.extraMetadataSqlParameters", "id"); configEntries.put("db.extraMetadataColumns", "INVALID"); + // Required for validation, but not specific to this test. + configEntries.put("db.everyDocIdSql", "select * from data"); thrown.expect(InvalidConfigurationException.class); thrown.expectMessage("These columns from db.extraMetadataColumns [INVALID]" From 40f13df7c0b14682fd60ad34b2d789c367dfbedc Mon Sep 17 00:00:00 2001 From: Gennis Emerson Date: Thu, 4 Jan 2018 11:43:53 -0800 Subject: [PATCH 3/4] Code review changes. --- .../adaptor/database/DatabaseAdaptor.java | 24 +-- .../adaptor/database/UniqueKey.java | 12 +- .../adaptor/database/DatabaseAdaptorTest.java | 193 +++++++++++++++++- 3 files changed, 210 insertions(+), 19 deletions(-) diff --git a/src/com/google/enterprise/adaptor/database/DatabaseAdaptor.java b/src/com/google/enterprise/adaptor/database/DatabaseAdaptor.java index 170bb97..9f77cfe 100644 --- a/src/com/google/enterprise/adaptor/database/DatabaseAdaptor.java +++ b/src/com/google/enterprise/adaptor/database/DatabaseAdaptor.java @@ -706,22 +706,22 @@ void addMetadataToResponse(final Response resp, ResultSet rs) private void addExtraMetadataToRecordBuilder( final DocIdPusher.Record.Builder builder, Connection conn, String uniqueId) throws SQLException, IOException { - MetadataHandler handler = new MetadataHandler() { - @Override public void addMetadata(String k, String v) { - builder.addMetadata(k, v); - } - }; - addExtraMetadata(conn, uniqueId, handler); + addExtraMetadata(conn, uniqueId, + new MetadataHandler() { + @Override public void addMetadata(String k, String v) { + builder.addMetadata(k, v); + } + }); } private void addExtraMetadataToResponse(final Response resp, Connection conn, String uniqueId) throws SQLException, IOException { - MetadataHandler handler = new MetadataHandler() { - @Override public void addMetadata(String k, String v) { - resp.addMetadata(k, v); - } - }; - addExtraMetadata(conn, uniqueId, handler); + addExtraMetadata(conn, uniqueId, + new MetadataHandler() { + @Override public void addMetadata(String k, String v) { + resp.addMetadata(k, v); + } + }); } private void addExtraMetadata(Connection conn, String uniqueId, diff --git a/src/com/google/enterprise/adaptor/database/UniqueKey.java b/src/com/google/enterprise/adaptor/database/UniqueKey.java index d2ce3f6..cfe21f3 100644 --- a/src/com/google/enterprise/adaptor/database/UniqueKey.java +++ b/src/com/google/enterprise/adaptor/database/UniqueKey.java @@ -117,6 +117,15 @@ String makeUniqueId(ResultSet rs) throws SQLException, URISyntaxException { private void setSqlValues(PreparedStatement st, String uniqueId, List sqlCols) throws SQLException { Map zip = new TreeMap<>(String.CASE_INSENSITIVE_ORDER); + // Parameters to SQL queries are taken from the unique id. When reading extraMetadata + // when docIdIsUrl is true, we're now passing the unique id to this method for the + // first time. Don't try to split the id; we know it must be a single unique string. + + // TODO(aptls): while this may be OK in that we don't want to try to split the id when + // we know it's a URL, the underlying use case of using value(s) from the unique id to + // retrieve the extra metadata might not want to have to use this URL as the metadata + // key. We want to change this to be able to retrieve the metadata key value(s) from + // the doc id or content result set instead of the key. if (docIdIsUrl) { zip.put(docIdSqlCols.get(0), uniqueId); } else { @@ -294,6 +303,7 @@ static class Builder { docIdSqlCols = Collections.unmodifiableList(tmpNames); contentSqlCols = docIdSqlCols; aclSqlCols = docIdSqlCols; + metadataSqlCols = docIdSqlCols; } /** @@ -359,7 +369,7 @@ Builder setAclSqlColumns(String aclSqlColumns) Builder setMetadataSqlColumns(String metadataSqlColumns) throws InvalidConfigurationException { - if (null == metadataSqlColumns) { + if (metadataSqlColumns == null) { throw new NullPointerException(); } if (!metadataSqlColumns.trim().isEmpty()) { diff --git a/test/com/google/enterprise/adaptor/database/DatabaseAdaptorTest.java b/test/com/google/enterprise/adaptor/database/DatabaseAdaptorTest.java index f87cb6a..5b82460 100644 --- a/test/com/google/enterprise/adaptor/database/DatabaseAdaptorTest.java +++ b/test/com/google/enterprise/adaptor/database/DatabaseAdaptorTest.java @@ -1172,7 +1172,7 @@ public void testInitLister_ignoredProperties_allColumns() throws Exception { } @Test - public void testInit_ignoredPropertyes_extraMetadata() throws Exception { + public void testInitExtraMetadata_ignoredProperties() throws Exception { executeUpdate("create table data(id int, other varchar(20))"); Map moreEntries = new HashMap(); @@ -1628,10 +1628,10 @@ public void testGetDocIds_urlAndMetadataLister_extraMetadata() + " values ('http://localhost/', 'John 2')"); Map configEntries = new HashMap(); + configEntries.put("docId.isUrl", "true"); + configEntries.put("db.modeOfOperation", "urlAndMetadataLister"); configEntries.put("db.uniqueKey", "url:string"); configEntries.put("db.everyDocIdSql", "select * from data"); - configEntries.put("db.modeOfOperation", "urlAndMetadataLister"); - configEntries.put("docId.isUrl", "true"); configEntries.put("db.metadataColumns", "URL:col1, NAME:col2"); configEntries.put("db.extraMetadataSql", "select other from metadata where url = ?"); @@ -1652,6 +1652,47 @@ public void testGetDocIds_urlAndMetadataLister_extraMetadata() pusher.getRecords()); } + @Test + public void testGetDocIds_docIdIsUrl_noMetadata() + throws Exception { + // The setup for this test should match + // testGetDocIds_urlAndMetadataLister_extraMetadata with the + // exception of modeOfOperation. We want to verify that no + // metadata is returned despite valid metadata configuration when + // docIdIsUrl is true and modeOfOperation is not + // urlAndMetadataLister. + executeUpdate("create table data(url varchar(200), name varchar(20))"); + executeUpdate( + "insert into data(url, name) values('http://localhost/','John')"); + executeUpdate( + "create table metadata(url varchar(200), other varchar(200))"); + executeUpdate("insert into metadata(url, other)" + + " values ('http://localhost/', 'John 1')"); + executeUpdate("insert into metadata(url, other)" + + " values ('http://localhost/', 'John 2')"); + + Map configEntries = new HashMap(); + configEntries.put("docId.isUrl", "true"); + // modeOfOperation must be valid and not urlAndMetadataLister for + // this test + configEntries.put("db.modeOfOperation", "rowToText"); + configEntries.put("db.uniqueKey", "url:string"); + configEntries.put("db.everyDocIdSql", "select * from data"); + configEntries.put("db.metadataColumns", "URL:col1, NAME:col2"); + configEntries.put("db.extraMetadataSql", + "select other from metadata where url = ?"); + configEntries.put("db.extraMetadataSqlParameters", "url"); + + DatabaseAdaptor adaptor = getObjectUnderTest(configEntries); + RecordingDocIdPusher pusher = new RecordingDocIdPusher(); + adaptor.getDocIds(pusher); + + assertEquals( + Arrays.asList(new Record.Builder(new DocId("http://localhost/")) + .build()), + pusher.getRecords()); + } + @Test public void testGetDocIds_docIdIsUrl() throws Exception { // Add time to show the records as modified. @@ -1686,6 +1727,7 @@ public void testGetDocIds_docIdIsUrl() throws Exception { pusher.getRecords()); } + @Test public void testGetDocIds_nullUniqueKey() throws Exception { executeUpdate("create table data(id integer, other varchar(20))"); @@ -2255,6 +2297,55 @@ public void testGetModifiedDocIds_urlAndMetadataLister_extraMetadata() pusher.getRecords()); } + @Test + public void testGetModifiedDocIds_docIdIsUrl_noMetadata() + throws Exception { + // The setup for this test should match + // testGetDocModifiedIds_urlAndMetadataLister_extraMetadata with + // the exception of modeOfOperation. We want to verify that no + // metadata is returned despite valid metadata configuration when + // docIdIsUrl is true and modeOfOperation is not + // urlAndMetadataLister. + // Add time to show the records as modified. + executeUpdate("create table data(url varchar(20)," + + " other varchar(20), ts timestamp)"); + executeUpdate("insert into data(url, other, ts) values ('http://localhost'," + + " 'hello world', " + nowPlus(1) + ")"); + executeUpdate( + "create table metadata(url varchar(200), other varchar(200))"); + executeUpdate("insert into metadata(url, other)" + + " values ('http://localhost', 'John 1')"); + executeUpdate("insert into metadata(url, other)" + + " values ('http://localhost', 'John 2')"); + + Map moreEntries = new HashMap(); + // modeOfOperation must be valid and not urlAndMetadataLister for + // this test + moreEntries.put("db.modeOfOperation", "rowToText"); + moreEntries.put("docId.isUrl", "true"); + moreEntries.put("db.uniqueKey", "url:string"); + moreEntries.put("db.updateSql", + "select url, other from data where ts >= ?"); + moreEntries.put("db.metadataColumns", "other"); + moreEntries.put("db.extraMetadataSql", + "select other as metadataother from metadata where url = ?"); + moreEntries.put("db.extraMetadataSqlParameters", "url"); + // Required for validation, but not specific to this test. + moreEntries.put("db.everyDocIdSql", "select url, other from data"); + moreEntries.put("db.singleDocContentSql", + "select * from data where url = ?"); + + PollingIncrementalLister lister = getPollingIncrementalLister(moreEntries); + RecordingDocIdPusher pusher = new RecordingDocIdPusher(); + lister.getModifiedDocIds(pusher); + + assertEquals( + Arrays.asList( + new Record.Builder(new DocId("http://localhost")) + .setCrawlImmediately(true).build()), + pusher.getRecords()); + } + @Test public void testGetModifiedDocIds_disableStreaming() throws Exception { // Subtract time to show the records as unchanged. @@ -2825,7 +2916,7 @@ public void testExtraMetadata_docAndExtra() throws Exception { configEntries.put("db.singleDocContentSql", "select id, name from data where id = ?"); configEntries.put("db.modeOfOperation", "rowToText"); - configEntries.put("db.metadataColumns", "name"); + configEntries.put("db.includeAllColumnsAsMetadata", "true"); configEntries.put("db.extraMetadataSql", "select other from metadata where id = ?"); configEntries.put("db.extraMetadataSqlParameters", "id"); @@ -2837,7 +2928,8 @@ public void testExtraMetadata_docAndExtra() throws Exception { RecordingResponse response = new RecordingResponse(); adaptor.getDocContent(request, response); Metadata expected = new Metadata(); - expected.add("name", "Doe, John"); + expected.add("ID", "1001"); + expected.add("NAME", "Doe, John"); expected.add("OTHER", "John 1"); expected.add("OTHER", "John 2"); // metadata comes from both the document row and the extra table @@ -2875,6 +2967,37 @@ public void testExtraMetadata_onlyExtra() throws Exception { assertEquals(expected, response.getMetadata()); } + @Test + public void testExtraMetadata_differentParameters() throws Exception { + executeUpdate("create table data(id int, name varchar(200), metaId int)"); + executeUpdate("create table metadata(id int, other varchar(200))"); + executeUpdate( + "insert into data(id, name, metaId) values (1001, 'Doe, John', 42)"); + executeUpdate("insert into metadata(id, other) values (42, 'John 1')"); + executeUpdate("insert into metadata(id, other) values (42, 'John 2')"); + + Map configEntries = new HashMap(); + configEntries.put("db.uniqueKey", "id:int,metaId:int"); + configEntries.put("db.singleDocContentSql", + "select id, name from data where id = ?"); + configEntries.put("db.singleDocContentSqlParameters", "id"); + configEntries.put("db.modeOfOperation", "rowToText"); + configEntries.put("db.extraMetadataSql", + "select other from metadata where id = ?"); + configEntries.put("db.extraMetadataSqlParameters", "metaId"); + // Required for validation, but not specific to this test. + configEntries.put("db.everyDocIdSql", "select * from data"); + + DatabaseAdaptor adaptor = getObjectUnderTest(configEntries); + MockRequest request = new MockRequest(new DocId("1001/42")); + RecordingResponse response = new RecordingResponse(); + adaptor.getDocContent(request, response); + Metadata expected = new Metadata(); + expected.add("OTHER", "John 1"); + expected.add("OTHER", "John 2"); + assertEquals(expected, response.getMetadata()); + } + @Test public void testExtraMetadata_extraMetadataColumns() throws Exception { executeUpdate("create table data(id int, name varchar(200))"); @@ -2908,7 +3031,7 @@ public void testExtraMetadata_extraMetadataColumns() throws Exception { } @Test - public void testExtraMetadata_extraMetadataColumns_Mapping() + public void testExtraMetadata_extraMetadataColumns_mapping() throws Exception { executeUpdate("create table data(id int, name varchar(200))"); executeUpdate("create table metadata(id int, other varchar(200))"); @@ -2967,6 +3090,64 @@ public void testExtraMetadata_invalidExtraMetadataColumns() throws Exception { DatabaseAdaptor adaptor = getObjectUnderTest(configEntries); } + @Test + public void testExtraMetadata_invalidExtraMetadataSql() throws Exception { + executeUpdate("create table data(id int, name varchar(200))"); + executeUpdate("insert into data(id, name) values (1001, 'Doe, John')"); + + Map configEntries = new HashMap(); + configEntries.put("db.uniqueKey", "id:int"); + configEntries.put("db.singleDocContentSql", + "select id, name from data where id = ?"); + configEntries.put("db.modeOfOperation", "rowToText"); + // Metadata table does not exist. When extraMetadataColumns isn't + // specified, the column names aren't verified in init, so this + // statement is prepared for the first time when adding extra + // metadata, triggering an exception to improve test coverage. + configEntries.put("db.extraMetadataSql", + "select * from metadata where id = ?"); + configEntries.put("db.extraMetadataSqlParameters", "id"); + // Required for validation, but not specific to this test. + configEntries.put("db.everyDocIdSql", "select * from data"); + + DatabaseAdaptor adaptor = getObjectUnderTest(configEntries); + MockRequest request = new MockRequest(new DocId("1001")); + RecordingResponse response = new RecordingResponse(); + thrown.expect(IOException.class); + thrown.expectCause(isA(SQLException.class)); + adaptor.getDocContent(request, response); + } + + @Test + public void testExtraMetadata_missingExtraMetadataSqlParameters() + throws Exception { + executeUpdate("create table data(id int, name varchar(200))"); + executeUpdate("create table metadata(id int, other varchar(200))"); + executeUpdate("insert into data(id, name) values (1001, 'Doe, John')"); + executeUpdate("insert into metadata(id, other) values (1001, 'John 1')"); + executeUpdate("insert into metadata(id, other) values (1001, 'John 2')"); + + Map configEntries = new HashMap(); + configEntries.put("db.uniqueKey", "id:int"); + configEntries.put("db.singleDocContentSql", + "select id, name from data where id = ?"); + configEntries.put("db.modeOfOperation", "rowToText"); + configEntries.put("db.extraMetadataSql", + "select * from metadata where id = ?"); + // Required for validation, but not specific to this test. + configEntries.put("db.everyDocIdSql", "select * from data"); + + DatabaseAdaptor adaptor = getObjectUnderTest(configEntries); + MockRequest request = new MockRequest(new DocId("1001")); + RecordingResponse response = new RecordingResponse(); + adaptor.getDocContent(request, response); + Metadata expected = new Metadata(); + expected.add("ID", "1001"); + expected.add("OTHER", "John 1"); + expected.add("OTHER", "John 2"); + assertEquals(expected, response.getMetadata()); + } + @Test public void testGetDocContentAcl() throws Exception { executeUpdate("create table data(id integer)"); From 6696f1903b7f92803bb05852ab2c065c06a253ef Mon Sep 17 00:00:00 2001 From: Gennis Emerson Date: Mon, 8 Jan 2018 11:40:55 -0800 Subject: [PATCH 4/4] Code review changes. --- .../adaptor/database/UniqueKey.java | 21 +++++++++++-------- .../adaptor/database/DatabaseAdaptorTest.java | 1 - 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/com/google/enterprise/adaptor/database/UniqueKey.java b/src/com/google/enterprise/adaptor/database/UniqueKey.java index cfe21f3..a17d0d6 100644 --- a/src/com/google/enterprise/adaptor/database/UniqueKey.java +++ b/src/com/google/enterprise/adaptor/database/UniqueKey.java @@ -117,15 +117,18 @@ String makeUniqueId(ResultSet rs) throws SQLException, URISyntaxException { private void setSqlValues(PreparedStatement st, String uniqueId, List sqlCols) throws SQLException { Map zip = new TreeMap<>(String.CASE_INSENSITIVE_ORDER); - // Parameters to SQL queries are taken from the unique id. When reading extraMetadata - // when docIdIsUrl is true, we're now passing the unique id to this method for the - // first time. Don't try to split the id; we know it must be a single unique string. - - // TODO(aptls): while this may be OK in that we don't want to try to split the id when - // we know it's a URL, the underlying use case of using value(s) from the unique id to - // retrieve the extra metadata might not want to have to use this URL as the metadata - // key. We want to change this to be able to retrieve the metadata key value(s) from - // the doc id or content result set instead of the key. + // Parameters to SQL queries are taken from the unique id. When + // reading extraMetadata when docIdIsUrl is true, we're now + // passing the unique id to this method for the first time. Don't + // try to split the id; we know it must be a single unique string. + + // TODO(aptls): while this may be OK in that we don't want to try + // to split the id when we know it's a URL, the underlying use + // case of using value(s) from the unique id to retrieve the extra + // metadata might not want to have to use this URL as the metadata + // key. We want to change this to be able to retrieve the metadata + // key value(s) from the doc id or content result set instead of + // the key. if (docIdIsUrl) { zip.put(docIdSqlCols.get(0), uniqueId); } else { diff --git a/test/com/google/enterprise/adaptor/database/DatabaseAdaptorTest.java b/test/com/google/enterprise/adaptor/database/DatabaseAdaptorTest.java index 5b82460..4eb64d9 100644 --- a/test/com/google/enterprise/adaptor/database/DatabaseAdaptorTest.java +++ b/test/com/google/enterprise/adaptor/database/DatabaseAdaptorTest.java @@ -1727,7 +1727,6 @@ public void testGetDocIds_docIdIsUrl() throws Exception { pusher.getRecords()); } - @Test public void testGetDocIds_nullUniqueKey() throws Exception { executeUpdate("create table data(id integer, other varchar(20))");