From 2520253bde864ae8259a7c0493b4443c76ae8df9 Mon Sep 17 00:00:00 2001 From: Adam Rauch Date: Thu, 11 Sep 2025 09:30:09 -0700 Subject: [PATCH 1/6] Speed up cloning/migrating of loaded GO schemas by dropping indices and re-adding after transfer --- .../org/labkey/api/protein/go/GoLoader.java | 17 ++++++++++++++--- .../src/org/labkey/protein/ProteinModule.java | 4 +++- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/protein/api-src/org/labkey/api/protein/go/GoLoader.java b/protein/api-src/org/labkey/api/protein/go/GoLoader.java index 9f6b95c8c7..cd39db1fe5 100644 --- a/protein/api-src/org/labkey/api/protein/go/GoLoader.java +++ b/protein/api-src/org/labkey/api/protein/go/GoLoader.java @@ -143,14 +143,25 @@ public void load() }); } - private void loadGoFromGz() throws SQLException, IOException, ServletException + public static void dropGoIndexes() { DbSchema schema = ProteinSchema.getSchema(); + new SqlExecutor(schema).execute(schema.getSqlDialect().execute(schema, "drop_go_indexes", "")); + } + + public static void createGoIndexes() + { + DbSchema schema = ProteinSchema.getSchema(); + new SqlExecutor(schema).execute(schema.getSqlDialect().execute(schema, "create_go_indexes", "")); + } + + private void loadGoFromGz() throws SQLException, IOException, ServletException + { Map map = getGoLoadMap(); long start = System.currentTimeMillis(); clearGoLoaded(); - new SqlExecutor(schema).execute(schema.getSqlDialect().execute(schema, "drop_go_indexes", "")); + dropGoIndexes(); logStatus("Starting to load GO annotation files"); logStatus(""); @@ -175,7 +186,7 @@ private void loadGoFromGz() throws SQLException, IOException, ServletException } } - new SqlExecutor(schema).execute(schema.getSqlDialect().execute(schema, "create_go_indexes", "")); + createGoIndexes(); long elapsed = System.currentTimeMillis() - start; logStatus("Successfully loaded all GO annotation files (" + DateUtil.formatDuration(elapsed) + ")"); diff --git a/protein/src/org/labkey/protein/ProteinModule.java b/protein/src/org/labkey/protein/ProteinModule.java index d17e618e72..aae792d1cc 100644 --- a/protein/src/org/labkey/protein/ProteinModule.java +++ b/protein/src/org/labkey/protein/ProteinModule.java @@ -36,6 +36,7 @@ import org.labkey.api.protein.annotation.CustomAnnotationSetManager; import org.labkey.api.protein.annotation.ProteinAnnotationPipelineProvider; import org.labkey.api.protein.fasta.FastaDbLoader; +import org.labkey.api.protein.go.GoLoader; import org.labkey.api.protein.query.CustomAnnotationSchema; import org.labkey.api.protein.query.ProteinUserSchema; import org.labkey.api.protein.search.MSSearchWebpart; @@ -135,13 +136,14 @@ public void doStartup(ModuleContext moduleContext) public void beforeSchema(DbSchema targetSchema) { new SqlExecutor(targetSchema).execute("ALTER TABLE prot.Organisms DROP CONSTRAINT FK_ProtOrganisms_ProtIdentifiers"); + GoLoader.dropGoIndexes(); } @Override public void afterSchema(DbSchema targetSchema) { new SqlExecutor(targetSchema).execute("ALTER TABLE prot.Organisms ADD CONSTRAINT FK_ProtOrganisms_ProtIdentifiers FOREIGN KEY (IdentId) REFERENCES prot.Identifiers (IdentId)"); - } + GoLoader.dropGoIndexes(); } }); } From 3b0b2d55083d579afb6764da27f913029f690db1 Mon Sep 17 00:00:00 2001 From: Adam Rauch Date: Fri, 12 Sep 2025 05:18:26 -0700 Subject: [PATCH 2/6] Refactor MigrationHandler. Add some (virtual) foreign keys and provide other guidance to help find container columns. --- protein/src/org/labkey/protein/ProteinModule.java | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/protein/src/org/labkey/protein/ProteinModule.java b/protein/src/org/labkey/protein/ProteinModule.java index aae792d1cc..1f66627598 100644 --- a/protein/src/org/labkey/protein/ProteinModule.java +++ b/protein/src/org/labkey/protein/ProteinModule.java @@ -22,7 +22,6 @@ import org.labkey.api.data.ContainerManager; import org.labkey.api.data.DatabaseMigrationService; import org.labkey.api.data.DatabaseMigrationService.DefaultMigrationHandler; -import org.labkey.api.data.DbSchema; import org.labkey.api.data.SqlExecutor; import org.labkey.api.data.TableSelector; import org.labkey.api.files.FileContentService; @@ -130,19 +129,19 @@ public void doStartup(ModuleContext moduleContext) } ProteinService.get().registerProteinSearchView(new ProteinSearchViewProvider()); - DatabaseMigrationService.get().registerHandler(ProteinSchema.getSchema(), new DefaultMigrationHandler() + DatabaseMigrationService.get().registerHandler(new DefaultMigrationHandler(ProteinSchema.getSchema()) { @Override - public void beforeSchema(DbSchema targetSchema) + public void beforeSchema() { - new SqlExecutor(targetSchema).execute("ALTER TABLE prot.Organisms DROP CONSTRAINT FK_ProtOrganisms_ProtIdentifiers"); + new SqlExecutor(getSchema()).execute("ALTER TABLE prot.Organisms DROP CONSTRAINT FK_ProtOrganisms_ProtIdentifiers"); GoLoader.dropGoIndexes(); } @Override - public void afterSchema(DbSchema targetSchema) + public void afterSchema() { - new SqlExecutor(targetSchema).execute("ALTER TABLE prot.Organisms ADD CONSTRAINT FK_ProtOrganisms_ProtIdentifiers FOREIGN KEY (IdentId) REFERENCES prot.Identifiers (IdentId)"); + new SqlExecutor(getSchema()).execute("ALTER TABLE prot.Organisms ADD CONSTRAINT FK_ProtOrganisms_ProtIdentifiers FOREIGN KEY (IdentId) REFERENCES prot.Identifiers (IdentId)"); GoLoader.dropGoIndexes(); } }); } From 7d2aeaf9d34403d28ad6a6f8ab3a30e85c00acd6 Mon Sep 17 00:00:00 2001 From: Adam Rauch Date: Fri, 12 Sep 2025 07:09:01 -0700 Subject: [PATCH 3/6] Identify container field key in more tables. Exclude some tables. --- protein/src/org/labkey/protein/ProteinModule.java | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/protein/src/org/labkey/protein/ProteinModule.java b/protein/src/org/labkey/protein/ProteinModule.java index 1f66627598..f010b5a4de 100644 --- a/protein/src/org/labkey/protein/ProteinModule.java +++ b/protein/src/org/labkey/protein/ProteinModule.java @@ -23,6 +23,7 @@ import org.labkey.api.data.DatabaseMigrationService; import org.labkey.api.data.DatabaseMigrationService.DefaultMigrationHandler; import org.labkey.api.data.SqlExecutor; +import org.labkey.api.data.TableInfo; import org.labkey.api.data.TableSelector; import org.labkey.api.files.FileContentService; import org.labkey.api.files.TableUpdaterFileListener; @@ -39,6 +40,7 @@ import org.labkey.api.protein.query.CustomAnnotationSchema; import org.labkey.api.protein.query.ProteinUserSchema; import org.labkey.api.protein.search.MSSearchWebpart; +import org.labkey.api.query.FieldKey; import org.labkey.api.usageMetrics.UsageMetricsService; import org.labkey.api.view.BaseWebPartFactory; import org.labkey.api.view.Portal; @@ -138,6 +140,16 @@ public void beforeSchema() GoLoader.dropGoIndexes(); } + @Override + public @Nullable FieldKey getContainerFieldKey(TableInfo sourceTable) + { + return switch (sourceTable.getName()) + { + case "AnnotationTypes", "AnnotInsertions", "FastaFiles", "FastaLoads", "GoGraphPath", "GoTerm", "GoTerm2Term", "GoTermDefinition", "GoTermSynonym", "Identifiers", "IdentTypes", "InfoSources", "SprotOrgMap" -> SITE_WIDE_TABLE; + default -> super.getContainerFieldKey(sourceTable); + }; + } + @Override public void afterSchema() { From 5562fa10f8fd580d361feb3ad47f018486268437 Mon Sep 17 00:00:00 2001 From: Adam Rauch Date: Sat, 13 Sep 2025 06:27:50 -0700 Subject: [PATCH 4/6] Find container field keys in nearly every table --- protein/src/org/labkey/protein/ProteinModule.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/protein/src/org/labkey/protein/ProteinModule.java b/protein/src/org/labkey/protein/ProteinModule.java index f010b5a4de..25c3d491e6 100644 --- a/protein/src/org/labkey/protein/ProteinModule.java +++ b/protein/src/org/labkey/protein/ProteinModule.java @@ -145,8 +145,8 @@ public void beforeSchema() { return switch (sourceTable.getName()) { - case "AnnotationTypes", "AnnotInsertions", "FastaFiles", "FastaLoads", "GoGraphPath", "GoTerm", "GoTerm2Term", "GoTermDefinition", "GoTermSynonym", "Identifiers", "IdentTypes", "InfoSources", "SprotOrgMap" -> SITE_WIDE_TABLE; - default -> super.getContainerFieldKey(sourceTable); + case "CustomAnnotation", "CustomAnnotationSet" -> super.getContainerFieldKey(sourceTable); + default -> SITE_WIDE_TABLE; }; } @@ -154,7 +154,8 @@ public void beforeSchema() public void afterSchema() { new SqlExecutor(getSchema()).execute("ALTER TABLE prot.Organisms ADD CONSTRAINT FK_ProtOrganisms_ProtIdentifiers FOREIGN KEY (IdentId) REFERENCES prot.Identifiers (IdentId)"); - GoLoader.dropGoIndexes(); } + GoLoader.dropGoIndexes(); + } }); } From 9a4ac54029652c3ccc85004589c2190fcdae70d0 Mon Sep 17 00:00:00 2001 From: Adam Rauch Date: Wed, 17 Sep 2025 04:38:33 -0700 Subject: [PATCH 5/6] Skip GO tables for now --- protein/src/org/labkey/protein/ProteinModule.java | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/protein/src/org/labkey/protein/ProteinModule.java b/protein/src/org/labkey/protein/ProteinModule.java index 25c3d491e6..6adb7d4520 100644 --- a/protein/src/org/labkey/protein/ProteinModule.java +++ b/protein/src/org/labkey/protein/ProteinModule.java @@ -140,6 +140,15 @@ public void beforeSchema() GoLoader.dropGoIndexes(); } + @Override + public List getTablesToCopy() + { + // Temporary: we've proven we can copy the GO tables, but they take a long time; skip them for now. TODO: Remove this override for production testing + return super.getTablesToCopy().stream() + .filter(tableInfo -> !tableInfo.getName().startsWith("Go")) + .toList(); + } + @Override public @Nullable FieldKey getContainerFieldKey(TableInfo sourceTable) { @@ -154,7 +163,7 @@ public void beforeSchema() public void afterSchema() { new SqlExecutor(getSchema()).execute("ALTER TABLE prot.Organisms ADD CONSTRAINT FK_ProtOrganisms_ProtIdentifiers FOREIGN KEY (IdentId) REFERENCES prot.Identifiers (IdentId)"); - GoLoader.dropGoIndexes(); + GoLoader.createGoIndexes(); } }); } From ab13b79514186e59ee50294a36b13366ba15980b Mon Sep 17 00:00:00 2001 From: Adam Rauch Date: Sat, 20 Sep 2025 10:11:55 -0700 Subject: [PATCH 6/6] Implement SkipSchemas property. Fix PropertyDomain and ProtocolApplication. --- protein/src/org/labkey/protein/ProteinModule.java | 9 --------- 1 file changed, 9 deletions(-) diff --git a/protein/src/org/labkey/protein/ProteinModule.java b/protein/src/org/labkey/protein/ProteinModule.java index 6adb7d4520..fd9d076f5c 100644 --- a/protein/src/org/labkey/protein/ProteinModule.java +++ b/protein/src/org/labkey/protein/ProteinModule.java @@ -140,15 +140,6 @@ public void beforeSchema() GoLoader.dropGoIndexes(); } - @Override - public List getTablesToCopy() - { - // Temporary: we've proven we can copy the GO tables, but they take a long time; skip them for now. TODO: Remove this override for production testing - return super.getTablesToCopy().stream() - .filter(tableInfo -> !tableInfo.getName().startsWith("Go")) - .toList(); - } - @Override public @Nullable FieldKey getContainerFieldKey(TableInfo sourceTable) {