diff --git a/src/com/google/enterprise/adaptor/database/DatabaseAdaptor.java b/src/com/google/enterprise/adaptor/database/DatabaseAdaptor.java index c0539cf..9f77cfe 100644 --- a/src/com/google/enterprise/adaptor/database/DatabaseAdaptor.java +++ b/src/com/google/enterprise/adaptor/database/DatabaseAdaptor.java @@ -128,6 +128,8 @@ private static boolean isNullOrEmptyString(String str) { private String everyDocIdSql; private String singleDocContentSql; private MetadataColumns metadataColumns; + private String extraMetadataSql; + private MetadataColumns extraMetadataColumns; private ResponseGenerator respGenerator; private String aclSql; private String aclPrincipalDelimiter; @@ -150,6 +152,9 @@ public void initConfig(Config config) { // when set to true, if "db.metadataColumns" is blank, it will use all // returned columns as metadata. config.addKey("db.includeAllColumnsAsMetadata", "false"); + config.addKey("db.extraMetadataSql", ""); + config.addKey("db.extraMetadataSqlParameters", ""); + config.addKey("db.extraMetadataColumns", ""); config.addKey("db.modeOfOperation", null); config.addKey("db.updateSql", ""); config.addKey("db.aclSql", ""); @@ -234,6 +239,27 @@ public void init(AdaptorContext context) throws Exception { } } + if (!cfg.getValue("db.extraMetadataSql").isEmpty()) { + extraMetadataSql = cfg.getValue("db.extraMetadataSql"); + log.config("extra metadata sql: " + extraMetadataSql); + String extraMetadataColumnsConfig = + cfg.getValue("db.extraMetadataColumns"); + if (extraMetadataColumnsConfig.isEmpty()) { + extraMetadataColumns = null; + log.config("extra metadata columns: Use all columns in ResultSet"); + } else { + extraMetadataColumns = new MetadataColumns(extraMetadataColumnsConfig); + log.config("extra metadata columns: " + extraMetadataColumns); + } + } else { + if (!cfg.getValue("db.extraMetadataColumns").isEmpty()) { + ignored.add("db.extraMetadataColumns"); + } + if (!cfg.getValue("db.extraMetadataSqlParameters").isEmpty()) { + ignored.add("db.extraMetadataSqlParameters"); + } + } + modeOfOperation = cfg.getValue("db.modeOfOperation"); log.config("mode of operation: " + modeOfOperation); @@ -258,6 +284,15 @@ public void init(AdaptorContext context) throws Exception { } else if (!metadataColumns.isEmpty()) { ignored.add("db.metadataColumns"); } + if (!cfg.getValue("db.extraMetadataSql").isEmpty()) { + ignored.add("db.extraMetadataSql"); + } + if (!cfg.getValue("db.extraMetadataColumns").isEmpty()) { + ignored.add("db.extraMetadataColumns"); + } + if (!cfg.getValue("db.extraMetadataSqlParameters").isEmpty()) { + ignored.add("db.extraMetadataSqlParameters"); + } } } else if (singleDocContentSql.isEmpty()) { throw new InvalidConfigurationException( @@ -311,7 +346,9 @@ public void init(AdaptorContext context) throws Exception { = new UniqueKey.Builder(cfg.getValue("db.uniqueKey")) .setDocIdIsUrl(docIdIsUrl) .setContentSqlColumns(cfg.getValue("db.singleDocContentSqlParameters")) - .setAclSqlColumns(cfg.getValue("db.aclSqlParameters")); + .setAclSqlColumns(cfg.getValue("db.aclSqlParameters")) + .setMetadataSqlColumns( + cfg.getValue("db.extraMetadataSqlParameters")); // Verify all column names. try (Connection conn = makeNewConnection()) { @@ -332,6 +369,10 @@ public void init(AdaptorContext context) throws Exception { "db.metadataColumns", metadataColumns.keySet()); } } + if (extraMetadataColumns != null) { + verifyColumnNames(conn, "db.extraMetadataSql", extraMetadataSql, + "db.extraMetadataColumns", extraMetadataColumns.keySet()); + } if (respGenerator instanceof ResponseGenerator.SingleColumnContent) { ResponseGenerator.SingleColumnContent content = (ResponseGenerator.SingleColumnContent) respGenerator; @@ -477,6 +518,9 @@ public void getDocIds(DocIdPusher pusher) throws IOException, builder.setDeleteFromIndex(true); } else if ("urlAndMetadataLister".equals(modeOfOperation)) { addMetadataToRecordBuilder(builder, rs); + if (extraMetadataSql != null) { + addExtraMetadataToRecordBuilder(builder, conn, id.getUniqueId()); + } } DocIdPusher.Record record = builder.build(); log.log(Level.FINEST, "doc id: {0}", id); @@ -501,15 +545,10 @@ private interface MetadataHandler { /* * Adds all specified metadata columns to the record or response being built. */ - private void addMetadata(MetadataHandler meta, ResultSet rs) - throws SQLException, IOException { + private void addMetadata(MetadataHandler meta, ResultSet rs, + MetadataColumns columns) throws SQLException, IOException { ResultSetMetaData rsMetaData = rs.getMetaData(); - synchronized (this) { - if (metadataColumns == null) { - metadataColumns = new MetadataColumns(rsMetaData); - } - } - for (Map.Entry entry : metadataColumns.entrySet()) { + for (Map.Entry entry : columns.entrySet()) { int index; try { index = rs.findColumn(entry.getKey()); @@ -633,25 +672,73 @@ private void addMetadata(MetadataHandler meta, ResultSet rs) @VisibleForTesting void addMetadataToRecordBuilder(final DocIdPusher.Record.Builder builder, ResultSet rs) throws SQLException, IOException { + synchronized (this) { + if (metadataColumns == null) { + metadataColumns = new MetadataColumns(rs.getMetaData()); + } + } addMetadata( new MetadataHandler() { @Override public void addMetadata(String k, String v) { builder.addMetadata(k, v); } }, - rs); + rs, metadataColumns); } @VisibleForTesting void addMetadataToResponse(final Response resp, ResultSet rs) throws SQLException, IOException { + synchronized (this) { + if (metadataColumns == null) { + metadataColumns = new MetadataColumns(rs.getMetaData()); + } + } addMetadata( new MetadataHandler() { @Override public void addMetadata(String k, String v) { resp.addMetadata(k, v); } }, - rs); + rs, metadataColumns); + } + + private void addExtraMetadataToRecordBuilder( + final DocIdPusher.Record.Builder builder, Connection conn, + String uniqueId) throws SQLException, IOException { + addExtraMetadata(conn, uniqueId, + new MetadataHandler() { + @Override public void addMetadata(String k, String v) { + builder.addMetadata(k, v); + } + }); + } + + private void addExtraMetadataToResponse(final Response resp, Connection conn, + String uniqueId) throws SQLException, IOException { + addExtraMetadata(conn, uniqueId, + new MetadataHandler() { + @Override public void addMetadata(String k, String v) { + resp.addMetadata(k, v); + } + }); + } + + private void addExtraMetadata(Connection conn, String uniqueId, + MetadataHandler handler) throws SQLException, IOException { + try (PreparedStatement stmt = prepareExtraMetadataStatement(conn, uniqueId); + ResultSet rs = stmt.executeQuery()) { + synchronized (this) { + if (extraMetadataColumns == null) { + extraMetadataColumns = new MetadataColumns(rs.getMetaData()); + } + } + while (rs.next()) { + addMetadata(handler, rs, extraMetadataColumns); + } + } catch (SQLException ex) { + throw new IOException(ex); + } } /** Gives the bytes of a document referenced with id. */ @@ -675,6 +762,9 @@ public void getDocContent(Request req, Response resp) throws IOException { } // Generate response metadata first. addMetadataToResponse(resp, rs); + if (extraMetadataSql != null) { + addExtraMetadataToResponse(resp, conn, id.getUniqueId()); + } // Generate Acl if aclSql is provided. if (aclSql != null) { resp.setAcl(getAcl(conn, id.getUniqueId())); @@ -838,6 +928,14 @@ private PreparedStatement getStreamFromDb(Connection conn, return st; } + private PreparedStatement prepareExtraMetadataStatement(Connection conn, + String uniqueId) throws SQLException { + PreparedStatement st = conn.prepareStatement(extraMetadataSql); + uniqueKey.setMetadataSqlValues(st, uniqueId); + log.log(Level.FINER, "about to get extra metadata: {0}", uniqueId); + return st; + } + /** * Mechanism that accepts stream of DocIdPusher.Record instances, buffers * them, and sends them when it has accumulated maximum allowed amount per @@ -921,7 +1019,7 @@ static ResponseGenerator loadResponseGenerator(Config config) { throw new InvalidConfigurationException(errmsg, ex); } } - + @VisibleForTesting static ResponseGenerator loadResponseGeneratorInternal(Method method, Map config) { @@ -1026,6 +1124,9 @@ public void getModifiedDocIds(DocIdPusher pusher) builder.setDeleteFromIndex(true); } else if ("urlAndMetadataLister".equals(modeOfOperation)) { addMetadataToRecordBuilder(builder, rs); + if (extraMetadataSql != null) { + addExtraMetadataToRecordBuilder(builder, conn, id.getUniqueId()); + } } DocIdPusher.Record record = builder.build(); log.log(Level.FINEST, "doc id: {0}", id); diff --git a/src/com/google/enterprise/adaptor/database/UniqueKey.java b/src/com/google/enterprise/adaptor/database/UniqueKey.java index 8488f86..a17d0d6 100644 --- a/src/com/google/enterprise/adaptor/database/UniqueKey.java +++ b/src/com/google/enterprise/adaptor/database/UniqueKey.java @@ -55,15 +55,17 @@ static enum ColumnType { private final Map types; // types of DocId columns private final List contentSqlCols; // columns for content query private final List aclSqlCols; // columns for acl query + private final List metadataSqlCols; // columns for metadata query private final boolean docIdIsUrl; private UniqueKey(List docIdSqlCols, Map types, List contentSqlCols, List aclSqlCols, - boolean docIdIsUrl) { + List metadataSqlCols, boolean docIdIsUrl) { this.docIdSqlCols = docIdSqlCols; this.types = types; this.contentSqlCols = contentSqlCols; this.aclSqlCols = aclSqlCols; + this.metadataSqlCols = metadataSqlCols; this.docIdIsUrl = docIdIsUrl; } @@ -114,18 +116,34 @@ String makeUniqueId(ResultSet rs) throws SQLException, URISyntaxException { private void setSqlValues(PreparedStatement st, String uniqueId, List sqlCols) throws SQLException { - // parse on / that isn't preceded by escape char _ - // (a / that is preceded by _ is part of column value) - String parts[] = uniqueId.split("(? zip = new TreeMap<>(String.CASE_INSENSITIVE_ORDER); - for (int i = 0; i < parts.length; i++) { - String columnValue = decodeSlashInData(parts[i]); - zip.put(docIdSqlCols.get(i), columnValue); + // Parameters to SQL queries are taken from the unique id. When + // reading extraMetadata when docIdIsUrl is true, we're now + // passing the unique id to this method for the first time. Don't + // try to split the id; we know it must be a single unique string. + + // TODO(aptls): while this may be OK in that we don't want to try + // to split the id when we know it's a URL, the underlying use + // case of using value(s) from the unique id to retrieve the extra + // metadata might not want to have to use this URL as the metadata + // key. We want to change this to be able to retrieve the metadata + // key value(s) from the doc id or content result set instead of + // the key. + if (docIdIsUrl) { + zip.put(docIdSqlCols.get(0), uniqueId); + } else { + // parse on / that isn't preceded by escape char _ + // (a / that is preceded by _ is part of column value) + String parts[] = uniqueId.split("(? types; // types of DocId columns private List contentSqlCols; // columns for content query private List aclSqlCols; // columns for acl query + private List metadataSqlCols; // columns for metadata query private boolean docIdIsUrl = false; /** @@ -282,6 +306,7 @@ static class Builder { docIdSqlCols = Collections.unmodifiableList(tmpNames); contentSqlCols = docIdSqlCols; aclSqlCols = docIdSqlCols; + metadataSqlCols = docIdSqlCols; } /** @@ -345,6 +370,19 @@ Builder setAclSqlColumns(String aclSqlColumns) return this; } + Builder setMetadataSqlColumns(String metadataSqlColumns) + throws InvalidConfigurationException { + if (metadataSqlColumns == null) { + throw new NullPointerException(); + } + if (!metadataSqlColumns.trim().isEmpty()) { + this.metadataSqlCols = + splitIntoNameList("db.extraMetadataSqlParameters", + metadataSqlColumns, types.keySet()); + } + return this; + } + private static List splitIntoNameList(String paramConfig, String cols, Set validNames) { List columnNames = new ArrayList(); @@ -434,6 +472,10 @@ List getAclSqlColumns() { return aclSqlCols; } + List getMetadataSqlColumns() { + return metadataSqlCols; + } + /** Return the Map of column types. */ @VisibleForTesting Map getColumnTypes() { @@ -464,7 +506,7 @@ UniqueKey build() throws InvalidConfigurationException { + " The key must be a single string column when docId.isUrl=true."); } return new UniqueKey(docIdSqlCols, types, contentSqlCols, aclSqlCols, - docIdIsUrl); + metadataSqlCols, docIdIsUrl); } } } diff --git a/test/com/google/enterprise/adaptor/database/DatabaseAdaptorTest.java b/test/com/google/enterprise/adaptor/database/DatabaseAdaptorTest.java index d297e84..4eb64d9 100644 --- a/test/com/google/enterprise/adaptor/database/DatabaseAdaptorTest.java +++ b/test/com/google/enterprise/adaptor/database/DatabaseAdaptorTest.java @@ -1124,6 +1124,32 @@ public void testInitLister_ignoredProperties_metadataColumns() containsString("[db.metadataColumns, db.singleDocContentSql]")); } + @Test + public void testInitLister_ignoredProperties_extraMetadataColumns() + throws Exception { + executeUpdate("create table data(url varchar(200), other varchar(20))"); + executeUpdate("create table metadata(url varchar(200), other varchar(20))"); + + Map moreEntries = new HashMap(); + moreEntries.put("db.modeOfOperation", "rowToText"); + moreEntries.put("docId.isUrl", "true"); + moreEntries.put("db.uniqueKey", "url:string"); + // Required for validation, but not specific to this test. + moreEntries.put("db.everyDocIdSql", "select url from data"); + // Ignored properties in this mode. + moreEntries.put("db.extraMetadataSql", "select * from metadata"); + moreEntries.put("db.extraMetadataSqlParameters", "url"); + moreEntries.put("db.extraMetadataColumns", "other"); + + List messages = new ArrayList(); + captureLogMessages(DatabaseAdaptor.class, "will be ignored", messages); + getObjectUnderTest(moreEntries); + assertEquals(messages.toString(), 1, messages.size()); + assertThat(messages.get(0), + containsString("[db.extraMetadataColumns, db.extraMetadataSql," + + " db.extraMetadataSqlParameters]")); + } + @Test public void testInitLister_ignoredProperties_allColumns() throws Exception { executeUpdate("create table data(url varchar(200), other varchar(200))"); @@ -1145,6 +1171,28 @@ public void testInitLister_ignoredProperties_allColumns() throws Exception { containsString("[db.includeAllColumnsAsMetadata]")); } + @Test + public void testInitExtraMetadata_ignoredProperties() throws Exception { + executeUpdate("create table data(id int, other varchar(20))"); + + Map moreEntries = new HashMap(); + moreEntries.put("db.extraMetadataSql", ""); + moreEntries.put("db.extraMetadataSqlParameters", "id"); + moreEntries.put("db.extraMetadataColumns", "id"); + // Required for validation, but not specific to this test. + moreEntries.put("db.modeOfOperation", "rowToText"); + moreEntries.put("db.uniqueKey", "id:int"); + moreEntries.put("db.everyDocIdSql", "select id from data"); + moreEntries.put("db.singleDocContentSql", + "select * from data where id = ?"); + List messages = new ArrayList(); + captureLogMessages(DatabaseAdaptor.class, "will be ignored", messages); + getObjectUnderTest(moreEntries); + assertEquals(messages.toString(), 1, messages.size()); + assertThat(messages.get(0), containsString( + "[db.extraMetadataColumns, db.extraMetadataSqlParameters]")); + } + @Test public void testInitEmptyQuery_singleDocContentSql() throws Exception { executeUpdate("create table data(id int, other varchar(20))"); @@ -1404,6 +1452,45 @@ public void testInitVerifyColumnNames_metadataColumnAliasBad() getObjectUnderTest(moreEntries); } + @Test + public void testInitVerifyColumnNames_metadataSql_columnInUniqueKey() + throws Exception { + executeUpdate("create table data(id int, other varchar(20))"); + + Map moreEntries = new HashMap(); + moreEntries.put("db.modeOfOperation", "rowToText"); + moreEntries.put("db.uniqueKey", "id:int"); + moreEntries.put("db.extraMetadataSql", + "select other from data where id = ?"); + moreEntries.put("db.extraMetadataSqlParameters", "id"); + // Required for validation, but not specific to this test. + moreEntries.put("db.everyDocIdSql", "select id from data"); + moreEntries.put("db.singleDocContentSql", + "select * from data where id = ?"); + getObjectUnderTest(moreEntries); + } + + @Test + public void testInitVerifyColumnNames_metadataSql_columnNotInUniqueKey() + throws Exception { + executeUpdate("create table data(id int, other varchar(20))"); + + Map moreEntries = new HashMap(); + moreEntries.put("db.modeOfOperation", "rowToText"); + moreEntries.put("db.uniqueKey", "id:int"); + moreEntries.put("db.extraMetadataSql", + "select other from data where id = ?"); + moreEntries.put("db.extraMetadataSqlParameters", "other"); + // Required for validation, but not specific to this test. + moreEntries.put("db.everyDocIdSql", "select id from data"); + moreEntries.put("db.singleDocContentSql", + "select * from data where id = ?"); + thrown.expect(InvalidConfigurationException.class); + thrown.expectMessage("Unknown column 'other' from " + + "db.extraMetadataSqlParameters not found in db.uniqueKey: [id]"); + getObjectUnderTest(moreEntries); + } + @Test public void testInitVerifyColumnNames_modeOfOperation() throws Exception { executeUpdate("create table data(id int, other varchar(20))"); @@ -1527,6 +1614,85 @@ public void testGetDocIds_urlAndMetadataLister() throws Exception { pusher.getRecords()); } + @Test + public void testGetDocIds_urlAndMetadataLister_extraMetadata() + throws Exception { + executeUpdate("create table data(url varchar(200), name varchar(20))"); + executeUpdate( + "insert into data(url, name) values('http://localhost/','John')"); + executeUpdate( + "create table metadata(url varchar(200), other varchar(200))"); + executeUpdate("insert into metadata(url, other)" + + " values ('http://localhost/', 'John 1')"); + executeUpdate("insert into metadata(url, other)" + + " values ('http://localhost/', 'John 2')"); + + Map configEntries = new HashMap(); + configEntries.put("docId.isUrl", "true"); + configEntries.put("db.modeOfOperation", "urlAndMetadataLister"); + configEntries.put("db.uniqueKey", "url:string"); + configEntries.put("db.everyDocIdSql", "select * from data"); + configEntries.put("db.metadataColumns", "URL:col1, NAME:col2"); + configEntries.put("db.extraMetadataSql", + "select other from metadata where url = ?"); + configEntries.put("db.extraMetadataSqlParameters", "url"); + + DatabaseAdaptor adaptor = getObjectUnderTest(configEntries); + RecordingDocIdPusher pusher = new RecordingDocIdPusher(); + adaptor.getDocIds(pusher); + + Metadata metadata = new Metadata(); + metadata.add("col1", "http://localhost/"); + metadata.add("col2", "John"); + metadata.add("OTHER", "John 1"); + metadata.add("OTHER", "John 2"); + assertEquals( + Arrays.asList(new Record.Builder(new DocId("http://localhost/")) + .setMetadata(metadata).build()), + pusher.getRecords()); + } + + @Test + public void testGetDocIds_docIdIsUrl_noMetadata() + throws Exception { + // The setup for this test should match + // testGetDocIds_urlAndMetadataLister_extraMetadata with the + // exception of modeOfOperation. We want to verify that no + // metadata is returned despite valid metadata configuration when + // docIdIsUrl is true and modeOfOperation is not + // urlAndMetadataLister. + executeUpdate("create table data(url varchar(200), name varchar(20))"); + executeUpdate( + "insert into data(url, name) values('http://localhost/','John')"); + executeUpdate( + "create table metadata(url varchar(200), other varchar(200))"); + executeUpdate("insert into metadata(url, other)" + + " values ('http://localhost/', 'John 1')"); + executeUpdate("insert into metadata(url, other)" + + " values ('http://localhost/', 'John 2')"); + + Map configEntries = new HashMap(); + configEntries.put("docId.isUrl", "true"); + // modeOfOperation must be valid and not urlAndMetadataLister for + // this test + configEntries.put("db.modeOfOperation", "rowToText"); + configEntries.put("db.uniqueKey", "url:string"); + configEntries.put("db.everyDocIdSql", "select * from data"); + configEntries.put("db.metadataColumns", "URL:col1, NAME:col2"); + configEntries.put("db.extraMetadataSql", + "select other from metadata where url = ?"); + configEntries.put("db.extraMetadataSqlParameters", "url"); + + DatabaseAdaptor adaptor = getObjectUnderTest(configEntries); + RecordingDocIdPusher pusher = new RecordingDocIdPusher(); + adaptor.getDocIds(pusher); + + assertEquals( + Arrays.asList(new Record.Builder(new DocId("http://localhost/")) + .build()), + pusher.getRecords()); + } + @Test public void testGetDocIds_docIdIsUrl() throws Exception { // Add time to show the records as modified. @@ -2085,6 +2251,99 @@ public void testGetModifiedDocIds_urlAndMetadataLister() throws Exception { pusher.getRecords()); } + @Test + public void testGetModifiedDocIds_urlAndMetadataLister_extraMetadata() + throws Exception { + // Add time to show the records as modified. + executeUpdate("create table data(url varchar(20)," + + " other varchar(20), ts timestamp)"); + executeUpdate("insert into data(url, other, ts) values ('http://localhost'," + + " 'hello world', " + nowPlus(1) + ")"); + executeUpdate( + "create table metadata(url varchar(200), other varchar(200))"); + executeUpdate("insert into metadata(url, other)" + + " values ('http://localhost', 'John 1')"); + executeUpdate("insert into metadata(url, other)" + + " values ('http://localhost', 'John 2')"); + + Map moreEntries = new HashMap(); + moreEntries.put("db.modeOfOperation", "urlAndMetadataLister"); + moreEntries.put("docId.isUrl", "true"); + moreEntries.put("db.uniqueKey", "url:string"); + moreEntries.put("db.updateSql", + "select url, other from data where ts >= ?"); + moreEntries.put("db.metadataColumns", "other"); + moreEntries.put("db.extraMetadataSql", + "select other as metadataother from metadata where url = ?"); + moreEntries.put("db.extraMetadataSqlParameters", "url"); + // Required for validation, but not specific to this test. + moreEntries.put("db.everyDocIdSql", "select url, other from data"); + moreEntries.put("db.singleDocContentSql", + "select * from data where url = ?"); + + PollingIncrementalLister lister = getPollingIncrementalLister(moreEntries); + RecordingDocIdPusher pusher = new RecordingDocIdPusher(); + lister.getModifiedDocIds(pusher); + + Metadata metadata = new Metadata(); + metadata.add("other", "hello world"); + metadata.add("METADATAOTHER", "John 1"); + metadata.add("METADATAOTHER", "John 2"); + assertEquals( + Arrays.asList( + new Record.Builder(new DocId("http://localhost")) + .setMetadata(metadata).setCrawlImmediately(true).build()), + pusher.getRecords()); + } + + @Test + public void testGetModifiedDocIds_docIdIsUrl_noMetadata() + throws Exception { + // The setup for this test should match + // testGetDocModifiedIds_urlAndMetadataLister_extraMetadata with + // the exception of modeOfOperation. We want to verify that no + // metadata is returned despite valid metadata configuration when + // docIdIsUrl is true and modeOfOperation is not + // urlAndMetadataLister. + // Add time to show the records as modified. + executeUpdate("create table data(url varchar(20)," + + " other varchar(20), ts timestamp)"); + executeUpdate("insert into data(url, other, ts) values ('http://localhost'," + + " 'hello world', " + nowPlus(1) + ")"); + executeUpdate( + "create table metadata(url varchar(200), other varchar(200))"); + executeUpdate("insert into metadata(url, other)" + + " values ('http://localhost', 'John 1')"); + executeUpdate("insert into metadata(url, other)" + + " values ('http://localhost', 'John 2')"); + + Map moreEntries = new HashMap(); + // modeOfOperation must be valid and not urlAndMetadataLister for + // this test + moreEntries.put("db.modeOfOperation", "rowToText"); + moreEntries.put("docId.isUrl", "true"); + moreEntries.put("db.uniqueKey", "url:string"); + moreEntries.put("db.updateSql", + "select url, other from data where ts >= ?"); + moreEntries.put("db.metadataColumns", "other"); + moreEntries.put("db.extraMetadataSql", + "select other as metadataother from metadata where url = ?"); + moreEntries.put("db.extraMetadataSqlParameters", "url"); + // Required for validation, but not specific to this test. + moreEntries.put("db.everyDocIdSql", "select url, other from data"); + moreEntries.put("db.singleDocContentSql", + "select * from data where url = ?"); + + PollingIncrementalLister lister = getPollingIncrementalLister(moreEntries); + RecordingDocIdPusher pusher = new RecordingDocIdPusher(); + lister.getModifiedDocIds(pusher); + + assertEquals( + Arrays.asList( + new Record.Builder(new DocId("http://localhost")) + .setCrawlImmediately(true).build()), + pusher.getRecords()); + } @Test public void testGetModifiedDocIds_disableStreaming() throws Exception { @@ -2643,6 +2902,251 @@ public void testMetadataColumns_columnAlias() throws Exception { assertEquals(expected, response.getMetadata()); } + @Test + public void testExtraMetadata_docAndExtra() throws Exception { + executeUpdate("create table data(id int, name varchar(200))"); + executeUpdate("create table metadata(id int, other varchar(200))"); + executeUpdate("insert into data(id, name) values (1001, 'Doe, John')"); + executeUpdate("insert into metadata(id, other) values (1001, 'John 1')"); + executeUpdate("insert into metadata(id, other) values (1001, 'John 2')"); + + Map configEntries = new HashMap(); + configEntries.put("db.uniqueKey", "id:int"); + configEntries.put("db.singleDocContentSql", + "select id, name from data where id = ?"); + configEntries.put("db.modeOfOperation", "rowToText"); + configEntries.put("db.includeAllColumnsAsMetadata", "true"); + configEntries.put("db.extraMetadataSql", + "select other from metadata where id = ?"); + configEntries.put("db.extraMetadataSqlParameters", "id"); + // Required for validation, but not specific to this test. + configEntries.put("db.everyDocIdSql", "select * from data"); + + DatabaseAdaptor adaptor = getObjectUnderTest(configEntries); + MockRequest request = new MockRequest(new DocId("1001")); + RecordingResponse response = new RecordingResponse(); + adaptor.getDocContent(request, response); + Metadata expected = new Metadata(); + expected.add("ID", "1001"); + expected.add("NAME", "Doe, John"); + expected.add("OTHER", "John 1"); + expected.add("OTHER", "John 2"); + // metadata comes from both the document row and the extra table + assertEquals(expected, response.getMetadata()); + } + + @Test + public void testExtraMetadata_onlyExtra() throws Exception { + executeUpdate("create table data(id int, name varchar(200))"); + executeUpdate("create table metadata(id int, other varchar(200))"); + executeUpdate("insert into data(id, name) values (1001, 'Doe, John')"); + executeUpdate("insert into metadata(id, other) values (1001, 'John 1')"); + executeUpdate("insert into metadata(id, other) values (1001, 'John 2')"); + + Map configEntries = new HashMap(); + configEntries.put("db.uniqueKey", "id:int"); + configEntries.put("db.singleDocContentSql", + "select id, name from data where id = ?"); + configEntries.put("db.modeOfOperation", "rowToText"); + // extraMetadataSql uses "select " to limit the + // columns returned as metadata + configEntries.put("db.extraMetadataSql", + "select other from metadata where id = ?"); + configEntries.put("db.extraMetadataSqlParameters", "id"); + // Required for validation, but not specific to this test. + configEntries.put("db.everyDocIdSql", "select * from data"); + + DatabaseAdaptor adaptor = getObjectUnderTest(configEntries); + MockRequest request = new MockRequest(new DocId("1001")); + RecordingResponse response = new RecordingResponse(); + adaptor.getDocContent(request, response); + Metadata expected = new Metadata(); + expected.add("OTHER", "John 1"); + expected.add("OTHER", "John 2"); + assertEquals(expected, response.getMetadata()); + } + + @Test + public void testExtraMetadata_differentParameters() throws Exception { + executeUpdate("create table data(id int, name varchar(200), metaId int)"); + executeUpdate("create table metadata(id int, other varchar(200))"); + executeUpdate( + "insert into data(id, name, metaId) values (1001, 'Doe, John', 42)"); + executeUpdate("insert into metadata(id, other) values (42, 'John 1')"); + executeUpdate("insert into metadata(id, other) values (42, 'John 2')"); + + Map configEntries = new HashMap(); + configEntries.put("db.uniqueKey", "id:int,metaId:int"); + configEntries.put("db.singleDocContentSql", + "select id, name from data where id = ?"); + configEntries.put("db.singleDocContentSqlParameters", "id"); + configEntries.put("db.modeOfOperation", "rowToText"); + configEntries.put("db.extraMetadataSql", + "select other from metadata where id = ?"); + configEntries.put("db.extraMetadataSqlParameters", "metaId"); + // Required for validation, but not specific to this test. + configEntries.put("db.everyDocIdSql", "select * from data"); + + DatabaseAdaptor adaptor = getObjectUnderTest(configEntries); + MockRequest request = new MockRequest(new DocId("1001/42")); + RecordingResponse response = new RecordingResponse(); + adaptor.getDocContent(request, response); + Metadata expected = new Metadata(); + expected.add("OTHER", "John 1"); + expected.add("OTHER", "John 2"); + assertEquals(expected, response.getMetadata()); + } + + @Test + public void testExtraMetadata_extraMetadataColumns() throws Exception { + executeUpdate("create table data(id int, name varchar(200))"); + executeUpdate("create table metadata(id int, other varchar(200))"); + executeUpdate("insert into data(id, name) values (1001, 'Doe, John')"); + executeUpdate("insert into metadata(id, other) values (1001, 'John 1')"); + executeUpdate("insert into metadata(id, other) values (1001, 'John 2')"); + + Map configEntries = new HashMap(); + configEntries.put("db.uniqueKey", "id:int"); + configEntries.put("db.singleDocContentSql", + "select id, name from data where id = ?"); + configEntries.put("db.modeOfOperation", "rowToText"); + // extraMetadataSql uses "select *", so extraMetadataColumns + // is used to limit the columns returned as metadata + configEntries.put("db.extraMetadataSql", + "select * from metadata where id = ?"); + configEntries.put("db.extraMetadataSqlParameters", "id"); + configEntries.put("db.extraMetadataColumns", "OTHER"); + // Required for validation, but not specific to this test. + configEntries.put("db.everyDocIdSql", "select * from data"); + + DatabaseAdaptor adaptor = getObjectUnderTest(configEntries); + MockRequest request = new MockRequest(new DocId("1001")); + RecordingResponse response = new RecordingResponse(); + adaptor.getDocContent(request, response); + Metadata expected = new Metadata(); + expected.add("OTHER", "John 1"); + expected.add("OTHER", "John 2"); + assertEquals(expected, response.getMetadata()); + } + + @Test + public void testExtraMetadata_extraMetadataColumns_mapping() + throws Exception { + executeUpdate("create table data(id int, name varchar(200))"); + executeUpdate("create table metadata(id int, other varchar(200))"); + executeUpdate("insert into data(id, name) values (1001, 'Doe, John')"); + executeUpdate("insert into metadata(id, other) values (1001, 'John 1')"); + executeUpdate("insert into metadata(id, other) values (1001, 'John 2')"); + + Map configEntries = new HashMap(); + configEntries.put("db.uniqueKey", "id:int"); + configEntries.put("db.singleDocContentSql", + "select id, name from data where id = ?"); + configEntries.put("db.modeOfOperation", "rowToText"); + // extraMetadataSql uses "select *", so extraMetadataColumns + // is used to limit the columns returned as metadata + configEntries.put("db.extraMetadataSql", + "select * from metadata where id = ?"); + configEntries.put("db.extraMetadataSqlParameters", "id"); + configEntries.put("db.extraMetadataColumns", "other:lowercasename"); + // Required for validation, but not specific to this test. + configEntries.put("db.everyDocIdSql", "select * from data"); + + DatabaseAdaptor adaptor = getObjectUnderTest(configEntries); + MockRequest request = new MockRequest(new DocId("1001")); + RecordingResponse response = new RecordingResponse(); + adaptor.getDocContent(request, response); + Metadata expected = new Metadata(); + expected.add("lowercasename", "John 1"); + expected.add("lowercasename", "John 2"); + assertEquals(expected, response.getMetadata()); + } + + @Test + public void testExtraMetadata_invalidExtraMetadataColumns() throws Exception { + executeUpdate("create table data(id int, name varchar(200))"); + executeUpdate("create table metadata(id int, other varchar(200))"); + executeUpdate("insert into data(id, name) values (1001, 'Doe, John')"); + executeUpdate("insert into metadata(id, other) values (1001, 'John 1')"); + executeUpdate("insert into metadata(id, other) values (1001, 'John 2')"); + + Map configEntries = new HashMap(); + configEntries.put("db.uniqueKey", "id:int"); + configEntries.put("db.singleDocContentSql", + "select id, name from data where id = ?"); + configEntries.put("db.modeOfOperation", "rowToText"); + configEntries.put("db.extraMetadataSql", + "select * from metadata where id = ?"); + configEntries.put("db.extraMetadataSqlParameters", "id"); + configEntries.put("db.extraMetadataColumns", "INVALID"); + // Required for validation, but not specific to this test. + configEntries.put("db.everyDocIdSql", "select * from data"); + + thrown.expect(InvalidConfigurationException.class); + thrown.expectMessage("These columns from db.extraMetadataColumns [INVALID]" + + " not found in query db.extraMetadataSql:" + + " select * from metadata where id = ?"); + DatabaseAdaptor adaptor = getObjectUnderTest(configEntries); + } + + @Test + public void testExtraMetadata_invalidExtraMetadataSql() throws Exception { + executeUpdate("create table data(id int, name varchar(200))"); + executeUpdate("insert into data(id, name) values (1001, 'Doe, John')"); + + Map configEntries = new HashMap(); + configEntries.put("db.uniqueKey", "id:int"); + configEntries.put("db.singleDocContentSql", + "select id, name from data where id = ?"); + configEntries.put("db.modeOfOperation", "rowToText"); + // Metadata table does not exist. When extraMetadataColumns isn't + // specified, the column names aren't verified in init, so this + // statement is prepared for the first time when adding extra + // metadata, triggering an exception to improve test coverage. + configEntries.put("db.extraMetadataSql", + "select * from metadata where id = ?"); + configEntries.put("db.extraMetadataSqlParameters", "id"); + // Required for validation, but not specific to this test. + configEntries.put("db.everyDocIdSql", "select * from data"); + + DatabaseAdaptor adaptor = getObjectUnderTest(configEntries); + MockRequest request = new MockRequest(new DocId("1001")); + RecordingResponse response = new RecordingResponse(); + thrown.expect(IOException.class); + thrown.expectCause(isA(SQLException.class)); + adaptor.getDocContent(request, response); + } + + @Test + public void testExtraMetadata_missingExtraMetadataSqlParameters() + throws Exception { + executeUpdate("create table data(id int, name varchar(200))"); + executeUpdate("create table metadata(id int, other varchar(200))"); + executeUpdate("insert into data(id, name) values (1001, 'Doe, John')"); + executeUpdate("insert into metadata(id, other) values (1001, 'John 1')"); + executeUpdate("insert into metadata(id, other) values (1001, 'John 2')"); + + Map configEntries = new HashMap(); + configEntries.put("db.uniqueKey", "id:int"); + configEntries.put("db.singleDocContentSql", + "select id, name from data where id = ?"); + configEntries.put("db.modeOfOperation", "rowToText"); + configEntries.put("db.extraMetadataSql", + "select * from metadata where id = ?"); + // Required for validation, but not specific to this test. + configEntries.put("db.everyDocIdSql", "select * from data"); + + DatabaseAdaptor adaptor = getObjectUnderTest(configEntries); + MockRequest request = new MockRequest(new DocId("1001")); + RecordingResponse response = new RecordingResponse(); + adaptor.getDocContent(request, response); + Metadata expected = new Metadata(); + expected.add("ID", "1001"); + expected.add("OTHER", "John 1"); + expected.add("OTHER", "John 2"); + assertEquals(expected, response.getMetadata()); + } + @Test public void testGetDocContentAcl() throws Exception { executeUpdate("create table data(id integer)"); diff --git a/test/com/google/enterprise/adaptor/database/UniqueKeyTest.java b/test/com/google/enterprise/adaptor/database/UniqueKeyTest.java index c4713ba..d6be3ce 100644 --- a/test/com/google/enterprise/adaptor/database/UniqueKeyTest.java +++ b/test/com/google/enterprise/adaptor/database/UniqueKeyTest.java @@ -94,6 +94,12 @@ public void testNullAclCols() { new UniqueKey.Builder("num:int").setAclSqlColumns(null); } + @Test + public void testNullMetadataCols() { + thrown.expect(NullPointerException.class); + new UniqueKey.Builder("num:int").setMetadataSqlColumns(null); + } + @Test public void testSingleInt() { UniqueKey.Builder builder = new UniqueKey.Builder("numnum:int"); @@ -187,6 +193,15 @@ public void testUnknownAclCol() { .setAclSqlColumns("numnum,IsStranger,strstr"); } + @Test + public void testUnknownMetadataCol() { + thrown.expect(InvalidConfigurationException.class); + thrown.expectMessage( + "Unknown column 'IsStranger' from db.extraMetadataSqlParameters"); + new UniqueKey.Builder("numnum:int,strstr:string") + .setMetadataSqlColumns("numnum,IsStranger,strstr"); + } + @Test public void testAddColumnTypes() { Map sqlTypes = new HashMap<>(); @@ -721,4 +736,24 @@ public void testSpacesBetweenAclSqlCols() { assertEquals(goldenNames, testBuilder.getAclSqlColumns()); } } + + @Test + public void testSpacesBetweenMetadataSqlCols() { + String ukDecl = "numnum:int,strstr:string"; + UniqueKey.Builder builder = new UniqueKey.Builder(ukDecl) + .setMetadataSqlColumns("numnum,numnum,strstr,numnum,strstr"); + List goldenNames = builder.getMetadataSqlColumns(); + + List testDecls = asList( + "numnum ,numnum,strstr,numnum,strstr", + "numnum, numnum,strstr,numnum,strstr", + "numnum , numnum,strstr,numnum,strstr", + "numnum , numnum,strstr,numnum,strstr", + "numnum , numnum , strstr , numnum,strstr"); + for (String colDecl : testDecls) { + UniqueKey.Builder testBuilder = new UniqueKey.Builder(ukDecl) + .setMetadataSqlColumns(colDecl); + assertEquals(goldenNames, testBuilder.getMetadataSqlColumns()); + } + } }