-
Notifications
You must be signed in to change notification settings - Fork 56
Optimize large OCL zip file import performance #119
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -21,9 +21,21 @@ | |||||||||||||||||
|
|
||||||||||||||||||
| import java.util.ArrayList; | ||||||||||||||||||
| import java.util.HashMap; | ||||||||||||||||||
| import java.util.HashSet; | ||||||||||||||||||
| import java.util.List; | ||||||||||||||||||
| import java.util.Map; | ||||||||||||||||||
| import java.util.Set; | ||||||||||||||||||
|
|
||||||||||||||||||
| /** | ||||||||||||||||||
| * In-memory cache layer for the OCL import pipeline. A new instance is created per import run | ||||||||||||||||||
| * in Importer.processInput() and is garbage collected when the import completes. | ||||||||||||||||||
| * <p> | ||||||||||||||||||
| * Most entity caches (concepts, conceptMaps, etc.) are cleared on each flush/clear cycle via | ||||||||||||||||||
| * {@link #clearCache()}. The item URL caches ({@code itemsByUrl}, {@code checkedItemUrls}) grow | ||||||||||||||||||
| * monotonically for the lifetime of the import since they must persist across batches and phases. | ||||||||||||||||||
| * For typical imports (~15K items), this is a few MB. For very large imports (hundreds of thousands | ||||||||||||||||||
| * of items), memory usage should be monitored. | ||||||||||||||||||
|
Comment on lines
+33
to
+37
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not entirely convinced that this bunch of text is helpful as a Javadoc comment. It's not incorrect, it's just not necessarily helpful.
Suggested change
|
||||||||||||||||||
| */ | ||||||||||||||||||
| public class CacheService { | ||||||||||||||||||
|
|
||||||||||||||||||
| ConceptService conceptService; | ||||||||||||||||||
|
|
@@ -59,6 +71,11 @@ public CacheService(ConceptService conceptService, OclConceptService oclConceptS | |||||||||||||||||
| // Cache for ConceptReferenceTerms keyed by "sourceId:code" | ||||||||||||||||||
| Map<String, ConceptReferenceTerm> conceptReferenceTerms = new HashMap<>(); | ||||||||||||||||||
|
|
||||||||||||||||||
| // Cache for item URL lookups - persists across clearCache() calls since items are used for metadata only | ||||||||||||||||||
| private final Map<String, Item> itemsByUrl = new HashMap<>(); | ||||||||||||||||||
| private final Set<String> checkedItemUrls = new HashSet<>(); | ||||||||||||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this could use an explanation and, more precisely, why it wouldn't be ok to just use |
||||||||||||||||||
| private boolean skipDbItemLookups = false; | ||||||||||||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. While it's explained on the setter (which is a weird place for it) and explanation of the variable here would seem useful. |
||||||||||||||||||
|
|
||||||||||||||||||
| /** | ||||||||||||||||||
| * Pre-loads all concept sources into the cache to avoid repeated database queries. | ||||||||||||||||||
| */ | ||||||||||||||||||
|
|
@@ -116,14 +133,51 @@ private void preloadConceptMapTypes() { | |||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| /** | ||||||||||||||||||
| * Gets the last successful item for a given URL by looking up in the database. | ||||||||||||||||||
| * This searches across all previous imports to find if this URL was previously imported. | ||||||||||||||||||
| * Gets the last successful item for a given URL. Results are cached to avoid | ||||||||||||||||||
| * repeated database queries for the same URL across batches and import phases. | ||||||||||||||||||
| * The cache persists across clearCache() calls since items are used for metadata only. | ||||||||||||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I actually think that the comment here is less helpful. Fewer words are faster to read. |
||||||||||||||||||
| */ | ||||||||||||||||||
| public Item getLastSuccessfulItemByUrl(String url, ImportService importService) { | ||||||||||||||||||
| if (url == null) { | ||||||||||||||||||
| return null; | ||||||||||||||||||
| } | ||||||||||||||||||
| return importService.getLastSuccessfulItemByUrl(url, this); | ||||||||||||||||||
|
|
||||||||||||||||||
| if (checkedItemUrls.contains(url)) { | ||||||||||||||||||
| return itemsByUrl.get(url); | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| if (skipDbItemLookups) { | ||||||||||||||||||
| return null; | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| Item item = importService.getLastSuccessfulItemByUrl(url, this); | ||||||||||||||||||
| checkedItemUrls.add(url); | ||||||||||||||||||
| if (item != null) { | ||||||||||||||||||
| itemsByUrl.put(url, item); | ||||||||||||||||||
| } | ||||||||||||||||||
| return item; | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| /** | ||||||||||||||||||
| * Caches an item by its URL for fast lookup during the import. | ||||||||||||||||||
| * Called after successfully saving a concept or mapping to make it | ||||||||||||||||||
| * available for subsequent lookups (e.g., mapping phase looking up concept items) | ||||||||||||||||||
| * without a database query. | ||||||||||||||||||
|
Comment on lines
+162
to
+165
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||
| */ | ||||||||||||||||||
| public void cacheItem(Item item) { | ||||||||||||||||||
| if (item != null && item.getUrl() != null && item.getState() != ItemState.ERROR) { | ||||||||||||||||||
| itemsByUrl.put(item.getUrl(), item); | ||||||||||||||||||
| checkedItemUrls.add(item.getUrl()); | ||||||||||||||||||
| } | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| /** | ||||||||||||||||||
| * When set to true, skips database lookups in getLastSuccessfulItemByUrl() for URLs | ||||||||||||||||||
| * not already in the cache. Used for first-time imports where no previous items exist, | ||||||||||||||||||
| * eliminating thousands of DB queries that would all return null. | ||||||||||||||||||
|
Comment on lines
+175
to
+177
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||
| */ | ||||||||||||||||||
| public void setSkipDbItemLookups(boolean skip) { | ||||||||||||||||||
| this.skipDbItemLookups = skip; | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| public void clearCache() { | ||||||||||||||||||
|
|
@@ -139,6 +193,12 @@ public void clearCache() { | |||||||||||||||||
| conceptMaps.clear(); | ||||||||||||||||||
| concepts.clear(); | ||||||||||||||||||
| conceptReferenceTerms.clear(); | ||||||||||||||||||
|
|
||||||||||||||||||
| // Note: itemsByUrl and checkedItemUrls are intentionally NOT cleared here. | ||||||||||||||||||
| // They must persist across flush/clear cycles so the mapping phase can look up | ||||||||||||||||||
| // concept items saved earlier without DB queries. For ~15,000 items this retains | ||||||||||||||||||
| // ~15K Item objects + String keys in memory, which is acceptable. The entire | ||||||||||||||||||
| // CacheService instance is GC'd when the import run completes. | ||||||||||||||||||
|
Comment on lines
+197
to
+201
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| public ConceptDatatype getConceptDatatypeByName(String name) { | ||||||||||||||||||
|
|
@@ -259,13 +319,13 @@ public ConceptMap getConceptMapByUuid(String uuid, ImportService importService) | |||||||||||||||||
| ConceptMap conceptMap = conceptMaps.get(uuid); | ||||||||||||||||||
| if (conceptMap != null) { | ||||||||||||||||||
| return conceptMap; | ||||||||||||||||||
| } else { | ||||||||||||||||||
| conceptMap = importService.getConceptMapByUuid(uuid); | ||||||||||||||||||
| if (conceptMap != null) { | ||||||||||||||||||
| conceptMaps.put(uuid, conceptMap); | ||||||||||||||||||
| } | ||||||||||||||||||
| return conceptMap; | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| conceptMap = importService.getConceptMapByUuid(uuid); | ||||||||||||||||||
| if (conceptMap != null) { | ||||||||||||||||||
| conceptMaps.put(uuid, conceptMap); | ||||||||||||||||||
| } | ||||||||||||||||||
| return conceptMap; | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| public Concept getConceptByUuid(String uuid) { | ||||||||||||||||||
|
|
@@ -276,26 +336,28 @@ public Concept getConceptByUuid(String uuid) { | |||||||||||||||||
| Concept concept = concepts.get(uuid); | ||||||||||||||||||
| if (concept != null) { | ||||||||||||||||||
| return concept; | ||||||||||||||||||
| } else { | ||||||||||||||||||
| concept = conceptService.getConceptByUuid(uuid); | ||||||||||||||||||
| if (concept != null) { | ||||||||||||||||||
| concepts.put(uuid, concept); | ||||||||||||||||||
| } | ||||||||||||||||||
| return concept; | ||||||||||||||||||
| } | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| concept = conceptService.getConceptByUuid(uuid); | ||||||||||||||||||
| if (concept != null) { | ||||||||||||||||||
| concepts.put(uuid, concept); | ||||||||||||||||||
| } | ||||||||||||||||||
| return concept; | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| public Concept getConceptWithSameAsMapping(String source, String code) { | ||||||||||||||||||
| if (source == null || code == null) { | ||||||||||||||||||
| return null; | ||||||||||||||||||
| } | ||||||||||||||||||
| String cacheKey = source + ":" + code; | ||||||||||||||||||
| Concept concept = concepts.get(cacheKey); | ||||||||||||||||||
| if (concept == null) { | ||||||||||||||||||
| concept = oclConceptService.getConceptWithSameAsMapping(code, source); | ||||||||||||||||||
| if (concept != null) { | ||||||||||||||||||
| concepts.put(cacheKey, concept); | ||||||||||||||||||
| } | ||||||||||||||||||
| if (concept != null) { | ||||||||||||||||||
| return concept; | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| concept = oclConceptService.getConceptWithSameAsMapping(code, source); | ||||||||||||||||||
| if (concept != null) { | ||||||||||||||||||
| concepts.put(cacheKey, concept); | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| return concept; | ||||||||||||||||||
|
|
||||||||||||||||||
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -30,6 +30,7 @@ | |||||||||||||
| import org.openmrs.module.openconceptlab.OpenConceptLabConstants; | ||||||||||||||
| import org.openmrs.module.openconceptlab.Subscription; | ||||||||||||||
| import org.openmrs.module.openconceptlab.Utils; | ||||||||||||||
| import org.openmrs.module.openconceptlab.ValidationType; | ||||||||||||||
| import org.openmrs.module.openconceptlab.client.OclClient; | ||||||||||||||
| import org.openmrs.module.openconceptlab.client.OclClient.OclResponse; | ||||||||||||||
| import org.openmrs.module.openconceptlab.client.OclConcept; | ||||||||||||||
|
|
@@ -54,7 +55,12 @@ public class Importer implements Runnable { | |||||||||||||
|
|
||||||||||||||
| private static final Logger log = LoggerFactory.getLogger(Importer.class); | ||||||||||||||
|
|
||||||||||||||
| public final static int BATCH_SIZE = 256; | ||||||||||||||
| // Number of items to process before flushing/clearing the Hibernate session. | ||||||||||||||
| // Higher values reduce flush/clear cycles but increase session memory usage | ||||||||||||||
| // (each concept carries names, descriptions, mappings, etc.). The original value | ||||||||||||||
| // was 256; 512 is a moderate increase that balances fewer cycles with memory | ||||||||||||||
| // safety across varied OpenMRS deployment environments. | ||||||||||||||
|
Comment on lines
+58
to
+62
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the kind of AI-generated comment I've been at war with. It's half-helpful with a bunch of additional notes that aren't terribly helpful:
Suggested change
Unless we're going to control this via a GP (which may actually be an OK idea), I don't think a lot of notes on what this was, etc. help. |
||||||||||||||
| public final static int BATCH_SIZE = 512; | ||||||||||||||
|
|
||||||||||||||
| private ImportService importService; | ||||||||||||||
|
|
||||||||||||||
|
|
@@ -320,9 +326,12 @@ private void processInput() throws IOException { | |||||||||||||
| return; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| // Look up subscription once for the entire import | ||||||||||||||
| Subscription subscription = importService.getSubscription(); | ||||||||||||||
|
|
||||||||||||||
| String baseUrl = ""; | ||||||||||||||
| if (importService.getSubscription() != null) { | ||||||||||||||
| baseUrl = importService.getSubscription().getUrl(); | ||||||||||||||
| if (subscription != null) { | ||||||||||||||
| baseUrl = subscription.getUrl(); | ||||||||||||||
| if (baseUrl != null) { | ||||||||||||||
| try { | ||||||||||||||
| URI uri = new URI(baseUrl); | ||||||||||||||
|
|
@@ -339,6 +348,21 @@ private void processInput() throws IOException { | |||||||||||||
|
|
||||||||||||||
| CacheService cacheService = new CacheService(conceptService, oclConceptService); | ||||||||||||||
|
|
||||||||||||||
| // Determine validation type once for the entire import to avoid per-concept getSubscription() calls. | ||||||||||||||
| ValidationType validationType = ValidationType.FULL; | ||||||||||||||
| if (subscription != null && subscription.getValidationType() != null) { | ||||||||||||||
| validationType = subscription.getValidationType(); | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| // If this is the first-ever import, skip DB lookups for previous items since none exist. | ||||||||||||||
| // We check <= 1 because the current in-progress import may already be visible in the | ||||||||||||||
| // query results (Hibernate auto-flushes before Criteria queries). getImportsInOrder() | ||||||||||||||
| // returns ALL imports (no status filtering), so size 0 or 1 both mean no prior import. | ||||||||||||||
| List<Import> previousImports = importService.getImportsInOrder(0, 2); | ||||||||||||||
| if (previousImports.size() <= 1) { | ||||||||||||||
| cacheService.setSkipDbItemLookups(true); | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| List<Item> items = new ArrayList<>(BATCH_SIZE); | ||||||||||||||
| while (parser.nextToken() != JsonToken.END_ARRAY) { | ||||||||||||||
| OclConcept oclConcept = parser.readValueAs(OclConcept.class); | ||||||||||||||
|
|
@@ -347,8 +371,9 @@ private void processInput() throws IOException { | |||||||||||||
|
|
||||||||||||||
| Item item; | ||||||||||||||
| try { | ||||||||||||||
| item = saver.saveConcept(cacheService, anImport, oclConcept); | ||||||||||||||
| log.info("Imported concept {}", oclConcept); | ||||||||||||||
| item = saver.saveConcept(cacheService, anImport, oclConcept, validationType); | ||||||||||||||
| cacheService.cacheItem(item); | ||||||||||||||
| log.debug("Imported concept {}", oclConcept); | ||||||||||||||
| } catch (Throwable e) { | ||||||||||||||
| log.error("Failed to import concept {}", oclConcept, e); | ||||||||||||||
| Context.clearSession(); | ||||||||||||||
|
|
@@ -393,7 +418,8 @@ private void processInput() throws IOException { | |||||||||||||
| Item item; | ||||||||||||||
| try { | ||||||||||||||
| item = saver.saveMapping(cacheService, anImport, oclMapping); | ||||||||||||||
| log.info("Imported mapping {}", oclMapping); | ||||||||||||||
| cacheService.cacheItem(item); | ||||||||||||||
| log.debug("Imported mapping {}", oclMapping); | ||||||||||||||
| } catch (SavingException e) { | ||||||||||||||
| log.error("Failed to save mapping {}", oclMapping, e); | ||||||||||||||
| Context.clearSession(); | ||||||||||||||
|
|
||||||||||||||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -76,6 +76,15 @@ public void setImportService(ImportService importService) { | |||||
| * @throws ImportException | ||||||
| */ | ||||||
| public Item saveConcept(final CacheService cacheService, final Import anImport, final OclConcept oclConcept) throws ImportException { | ||||||
| return saveConcept(cacheService, anImport, oclConcept, null); | ||||||
| } | ||||||
|
|
||||||
| /** | ||||||
| * Saves a concept with the specified validation type. | ||||||
| * When validationType is non-null, it is used directly instead of looking up the subscription each time. | ||||||
| * This avoids repeated getSubscription() calls for every concept in the import. | ||||||
|
Comment on lines
+84
to
+85
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be better here to have a note on what happens when |
||||||
| */ | ||||||
| public Item saveConcept(final CacheService cacheService, final Import anImport, final OclConcept oclConcept, ValidationType validationType) throws ImportException { | ||||||
| Import thisImport = anImport; | ||||||
|
|
||||||
| Item item = cacheService.getLastSuccessfulItemByUrl(oclConcept.getUrl(), importService); | ||||||
|
|
@@ -98,16 +107,18 @@ public Item saveConcept(final CacheService cacheService, final Import anImport, | |||||
| while (true) { | ||||||
| try { | ||||||
| try { | ||||||
| ValidationType validationType; | ||||||
| if (importService.getSubscription() != null) { | ||||||
| validationType = importService.getSubscription().getValidationType(); | ||||||
| } else { | ||||||
| validationType = ValidationType.FULL; | ||||||
| ValidationType effectiveValidationType = validationType; | ||||||
| if (effectiveValidationType == null) { | ||||||
| if (importService.getSubscription() != null) { | ||||||
| effectiveValidationType = importService.getSubscription().getValidationType(); | ||||||
| } else { | ||||||
| effectiveValidationType = ValidationType.FULL; | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| if (ValidationType.FULL.equals(validationType)) { | ||||||
| if (ValidationType.FULL.equals(effectiveValidationType)) { | ||||||
| conceptService.saveConcept(concept); | ||||||
| } else if (ValidationType.NONE.equals(validationType)) { | ||||||
| } else if (ValidationType.NONE.equals(effectiveValidationType)) { | ||||||
| importService.updateConceptWithoutValidation(concept); | ||||||
| } | ||||||
|
|
||||||
|
|
@@ -440,7 +451,7 @@ public Item saveMapping(final CacheService cacheService, final Import update, fi | |||||
| ItemState state; | ||||||
|
|
||||||
| // Get any existing ConceptMap entry by uuid | ||||||
| ConceptMap conceptMap = importService.getConceptMapByUuid(oclMapping.getExternalId()); | ||||||
| ConceptMap conceptMap = cacheService.getConceptMapByUuid(oclMapping.getExternalId(), importService); | ||||||
|
|
||||||
| // If we find an existing Map by uuid, update it to match the passed in mapping | ||||||
| if (conceptMap != null) { | ||||||
|
|
||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.