From 530dea064ab1fd82a64753c3474ce4c5878f3af3 Mon Sep 17 00:00:00 2001 From: Arthur Baghdasaryan Date: Wed, 1 Apr 2026 10:12:27 +0400 Subject: [PATCH 1/9] DP-45831: Update intenal links on mass.gov that are redirects --- conf/drupal/config/core.extension.yml | 1 + .../custom/mass_redirect_normalizer/README.md | 30 ++ .../mass_redirect_normalizer.info.yml | 9 + .../mass_redirect_normalizer.module | 20 + .../mass_redirect_normalizer.services.yml | 12 + .../MassRedirectNormalizerCommands.php | 137 +++++ .../src/RedirectLinkNormalizationManager.php | 101 ++++ .../src/RedirectLinkResolver.php | 208 ++++++++ .../RedirectLinkNormalizationTest.php | 485 ++++++++++++++++++ 9 files changed, 1003 insertions(+) create mode 100644 docroot/modules/custom/mass_redirect_normalizer/README.md create mode 100644 docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.info.yml create mode 100644 docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.module create mode 100644 docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.services.yml create mode 100644 docroot/modules/custom/mass_redirect_normalizer/src/Drush/Commands/MassRedirectNormalizerCommands.php create mode 100644 docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php create mode 100644 docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkResolver.php create mode 100644 docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php diff --git a/conf/drupal/config/core.extension.yml b/conf/drupal/config/core.extension.yml index fedd4e098a..777f373864 100644 --- a/conf/drupal/config/core.extension.yml +++ b/conf/drupal/config/core.extension.yml @@ -145,6 +145,7 @@ module: mass_microsites: 0 mass_more_lists: 0 mass_nav: 0 + mass_redirect_normalizer: 0 mass_redirects: 0 mass_scheduled_transitions: 0 mass_schema_apply_action: 0 diff --git a/docroot/modules/custom/mass_redirect_normalizer/README.md b/docroot/modules/custom/mass_redirect_normalizer/README.md new file mode 100644 index 0000000000..3d17b30d03 --- /dev/null +++ b/docroot/modules/custom/mass_redirect_normalizer/README.md @@ -0,0 +1,30 @@ +# Redirect Link Normalizer + +This module rewrites internal links that currently rely on redirects so they +point to the final destination path directly. For rich text links, the process +also adds `data-entity-*` attributes when the final target resolves to a node. + +## Manual execution + +- Dry run: + - `ddev drush --simulate mass-redirect-normalizer:normalize-links --limit=500` +- Execute: + - `ddev drush mass-redirect-normalizer:normalize-links --limit=5000` +- Optional filters: + - `--entity-type=node|paragraph|all` + - `--bundle=` + - `--show-unchanged` + +## Periodic execution + +For one-time/periodic bulk cleanup, run the Drush command above. + +For ongoing maintenance, this module also normalizes links during entity save +via `hook_entity_presave()` for nodes and paragraphs. This means new edits +automatically store final target paths instead of redirecting paths. + +## Post-run usage refresh + +For large backfills, regenerate entity usage to refresh orphan reports: + +- `ddev drush mass-content:usage-regenerate --batch-size=1000` diff --git a/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.info.yml b/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.info.yml new file mode 100644 index 0000000000..362c28d93b --- /dev/null +++ b/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.info.yml @@ -0,0 +1,9 @@ +name: Mass Redirect Normalizer +type: module +description: Normalize internal links that point at redirects to their final targets. +core_version_requirement: ^10 || ^11 +package: Custom +dependencies: + - mass_fields:mass_fields + - mass_content:mass_content + diff --git a/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.module b/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.module new file mode 100644 index 0000000000..73926a9569 --- /dev/null +++ b/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.module @@ -0,0 +1,20 @@ +normalizeEntity($entity, FALSE); +} + diff --git a/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.services.yml b/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.services.yml new file mode 100644 index 0000000000..3c60356b95 --- /dev/null +++ b/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.services.yml @@ -0,0 +1,12 @@ +services: + mass_redirect_normalizer.resolver: + class: Drupal\mass_redirect_normalizer\RedirectLinkResolver + arguments: ['@entity_type.manager', '@path_alias.manager', '@request_stack', '@router.request_context'] + + mass_redirect_normalizer.manager: + class: Drupal\mass_redirect_normalizer\RedirectLinkNormalizationManager + arguments: ['@mass_redirect_normalizer.resolver', '@datetime.time'] + + Drupal\mass_redirect_normalizer\RedirectLinkNormalizationManager: + alias: mass_redirect_normalizer.manager + diff --git a/docroot/modules/custom/mass_redirect_normalizer/src/Drush/Commands/MassRedirectNormalizerCommands.php b/docroot/modules/custom/mass_redirect_normalizer/src/Drush/Commands/MassRedirectNormalizerCommands.php new file mode 100644 index 0000000000..919a9c0a88 --- /dev/null +++ b/docroot/modules/custom/mass_redirect_normalizer/src/Drush/Commands/MassRedirectNormalizerCommands.php @@ -0,0 +1,137 @@ + 0, 'entity-type' => 'all', 'bundle' => NULL, 'show-unchanged' => FALSE]): RowsOfFields { + $_ENV['MASS_FLAGGING_BYPASS'] = TRUE; + $entityTypes = $options['entity-type'] === 'all' ? ['node', 'paragraph'] : [(string) $options['entity-type']]; + $limit = max(0, (int) $options['limit']); + $showUnchanged = !empty($options['show-unchanged']); + try { + $simulate = Drush::simulate(); + } + catch (\RuntimeException) { + // Allow direct invocation in PHPUnit without Drush bootstrap. + $simulate = FALSE; + } + $rows = []; + $processed = 0; + $changed = 0; + $skipped = 0; + + foreach ($entityTypes as $entityType) { + if (!in_array($entityType, ['node', 'paragraph'], TRUE)) { + $rows[] = [ + 'status' => 'unsupported', + 'entity_type' => $entityType, + 'entity_id' => 'N/A', + 'bundle' => 'N/A', + 'details' => 'Unsupported entity type', + ]; + continue; + } + + $idField = $entityType === 'node' ? 'nid' : 'id'; + $query = $this->entityTypeManager->getStorage($entityType)->getQuery() + ->accessCheck(FALSE) + ->sort($idField); + if (!empty($options['bundle'])) { + $query->condition('type', $options['bundle']); + } + if ($limit > 0) { + $query->range(0, $limit); + } + $ids = $query->execute(); + + foreach ($ids as $id) { + $entity = $this->entityTypeManager->getStorage($entityType)->load($id); + if (!$entity) { + continue; + } + + $result = $this->normalizerManager->normalizeEntity($entity, !$simulate); + $processed++; + if (!empty($result['changed'])) { + $changed++; + $rows[] = [ + 'status' => $simulate ? 'would_update' : 'updated', + 'entity_type' => $entityType, + 'entity_id' => $id, + 'bundle' => $entity->bundle(), + 'details' => 'Redirect-based links normalized', + ]; + } + elseif (!empty($result['skipped'])) { + $skipped++; + $rows[] = [ + 'status' => 'skipped', + 'entity_type' => $entityType, + 'entity_id' => $id, + 'bundle' => $entity->bundle(), + 'details' => 'Orphan paragraph skipped', + ]; + } + elseif ($showUnchanged) { + $rows[] = [ + 'status' => 'unchanged', + 'entity_type' => $entityType, + 'entity_id' => $id, + 'bundle' => $entity->bundle(), + 'details' => 'No redirect-based links found', + ]; + } + } + } + + $mode = $simulate ? 'SIMULATION' : 'EXECUTION'; + $rows[] = [ + 'status' => 'summary', + 'entity_type' => 'all', + 'entity_id' => (string) $processed, + 'bundle' => 'N/A', + 'details' => "{$mode} complete. changed={$changed}; skipped={$skipped}", + ]; + + return new RowsOfFields($rows); + } + +} + diff --git a/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php b/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php new file mode 100644 index 0000000000..50e573c746 --- /dev/null +++ b/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php @@ -0,0 +1,101 @@ + FALSE, 'skipped' => TRUE]; + } + + $changed = FALSE; + foreach ($entity->getFields() as $field) { + $fieldType = $field->getFieldDefinition()->getType(); + if (in_array($fieldType, ['text_long', 'text_with_summary', 'string_long'], TRUE)) { + foreach ($field as $item) { + if (!isset($item->value) || $item->value === NULL || $item->value === '') { + continue; + } + $processed = $this->resolver->normalizeRedirectLinksInText($item->value); + if ($processed['changed']) { + $item->value = $processed['text']; + $changed = TRUE; + } + } + } + elseif ($fieldType === 'link') { + foreach ($field as $item) { + if (empty($item->uri)) { + continue; + } + $processed = $this->resolver->normalizeRedirectLinkUri($item->uri); + if ($processed['changed']) { + $item->uri = $processed['uri']; + $changed = TRUE; + } + } + } + } + + if (!$changed || !$save) { + return ['changed' => $changed, 'skipped' => FALSE]; + } + + if (method_exists($entity, 'setNewRevision')) { + call_user_func([$entity, 'setNewRevision']); + } + if (method_exists($entity, 'setRevisionLogMessage')) { + call_user_func([$entity, 'setRevisionLogMessage'], 'Revision created to normalize redirected internal links.'); + } + if (method_exists($entity, 'setRevisionCreationTime')) { + call_user_func([$entity, 'setRevisionCreationTime'], $this->time->getRequestTime()); + } + $entity->save(); + + if ($entity->getEntityTypeId() === 'paragraph' && $node = Helper::getParentNode($entity)) { + if (method_exists($node, 'setNewRevision')) { + $node->setNewRevision(); + } + if (method_exists($node, 'setRevisionLogMessage')) { + $node->setRevisionLogMessage('Revision created to normalize redirected internal links in nested content.'); + } + if (method_exists($node, 'setRevisionCreationTime')) { + $node->setRevisionCreationTime($this->time->getRequestTime()); + } + $node->save(); + } + + return ['changed' => TRUE, 'skipped' => FALSE]; + } + +} + diff --git a/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkResolver.php b/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkResolver.php new file mode 100644 index 0000000000..890eddb4e8 --- /dev/null +++ b/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkResolver.php @@ -0,0 +1,208 @@ +query('//a[@href]') as $anchor) { + if (!$anchor instanceof \DOMElement) { + continue; + } + $href = (string) $anchor->getAttribute('href'); + $resolved = $this->resolveRedirectTarget($href); + if (!$resolved['changed']) { + continue; + } + + $anchor->setAttribute('href', $resolved['target_path']); + if (!empty($resolved['node'])) { + $anchor->setAttribute('data-entity-uuid', $resolved['node']->uuid()); + $anchor->setAttribute('data-entity-substitution', 'canonical'); + $anchor->setAttribute('data-entity-type', 'node'); + } + $changed = TRUE; + } + + return [ + 'changed' => $changed, + 'text' => Html::serialize($dom), + ]; + } + + /** + * Normalizes redirected internal links in link fields. + */ + public function normalizeRedirectLinkUri(string $uri): array { + $resolved = $this->resolveRedirectTarget($uri); + if (!$resolved['changed']) { + return [ + 'changed' => FALSE, + 'uri' => $uri, + ]; + } + + return [ + 'changed' => TRUE, + 'uri' => 'internal:' . $resolved['target_path'], + ]; + } + + /** + * Resolves an internal URL/path through redirect chains. + */ + public function resolveRedirectTarget(string $url, int $maxDepth = 10): array { + $parsed = parse_url($url) ?: []; + $sourcePath = $this->extractLocalPath($url); + if (!$sourcePath) { + return ['changed' => FALSE]; + } + + $query = empty($parsed['query']) ? '' : '?' . $parsed['query']; + $fragment = empty($parsed['fragment']) ? '' : '#' . $parsed['fragment']; + + $current = ltrim($sourcePath, '/'); + $visited = []; + + for ($i = 0; $i < $maxDepth; $i++) { + if (isset($visited[$current])) { + break; + } + $visited[$current] = TRUE; + + $redirect = $this->loadRedirectBySourcePath($current); + if (!$redirect instanceof Redirect) { + break; + } + + $next = $this->extractLocalPath($redirect->getRedirectUrl()->toString()); + if (!$next) { + break; + } + $current = ltrim($next, '/'); + } + + $finalPath = '/' . ltrim($current, '/'); + $targetPath = $finalPath . $query . $fragment; + $sourceNormalized = '/' . ltrim($sourcePath, '/') . $query . $fragment; + if ($targetPath === $sourceNormalized) { + return ['changed' => FALSE]; + } + + $node = NULL; + $internalPath = $this->pathAliasManager->getPathByAlias($finalPath); + if (preg_match('/^\/node\/(\d+)$/', $internalPath, $matches)) { + $node = $this->entityTypeManager->getStorage('node')->load((int) $matches[1]); + } + + return [ + 'changed' => TRUE, + 'target_path' => $targetPath, + 'node' => $node, + ]; + } + + /** + * Extracts local path from URL/URI; returns NULL for non-local hosts. + */ + private function extractLocalPath(string $url): ?string { + if (str_starts_with($url, 'internal:')) { + $path = (string) parse_url(substr($url, strlen('internal:')), PHP_URL_PATH); + return '/' . ltrim($path, '/'); + } + + if (str_starts_with($url, '/')) { + $path = (string) parse_url($url, PHP_URL_PATH); + return '/' . ltrim($path, '/'); + } + + if (!UrlHelper::isExternal($url)) { + $path = (string) parse_url($url, PHP_URL_PATH); + return '/' . ltrim($path, '/'); + } + + $parts = parse_url($url); + $host = strtolower((string) ($parts['host'] ?? '')); + $knownHosts = ['mass.gov', 'www.mass.gov']; + if ($this->requestStack->getCurrentRequest()) { + $knownHosts[] = strtolower((string) $this->requestStack->getCurrentRequest()->getHost()); + } + $requestContextHost = strtolower((string) $this->requestContext->getHost()); + if ($requestContextHost !== '') { + $knownHosts[] = $requestContextHost; + } + if (!in_array($host, array_filter($knownHosts), TRUE)) { + return NULL; + } + + $path = $parts['path'] ?? '/'; + return '/' . ltrim((string) $path, '/'); + } + + /** + * Loads redirect by source path, tolerating leading slash differences. + */ + private function loadRedirectBySourcePath(string $sourcePath): ?Redirect { + $sourcePath = trim($sourcePath); + if ($sourcePath === '') { + return NULL; + } + + $candidates = [ + ltrim($sourcePath, '/'), + '/' . ltrim($sourcePath, '/'), + ]; + + $storage = $this->entityTypeManager->getStorage('redirect'); + foreach ($candidates as $candidate) { + $query = $storage->getQuery() + ->accessCheck(FALSE) + ->range(0, 1); + $group = $query->orConditionGroup() + ->condition('redirect_source.path', $candidate) + ->condition('redirect_source__path', $candidate); + $ids = $query->condition($group)->execute(); + if (!$ids) { + continue; + } + + $redirect = $storage->load((int) reset($ids)); + if ($redirect instanceof Redirect) { + return $redirect; + } + } + + return NULL; + } + +} + diff --git a/docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php b/docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php new file mode 100644 index 0000000000..7a021b4ab2 --- /dev/null +++ b/docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php @@ -0,0 +1,485 @@ +randomMachineName(); + $sourceFinal = 'chain-final-' . $this->randomMachineName(); + + $secondHop = Redirect::create(); + $secondHop->setRedirect('node/' . $target->id()); + $secondHop->setSource($sourceFinal); + $secondHop->setLanguage($target->language()->getId()); + $secondHop->setStatusCode(\Drupal::config('redirect.settings')->get('default_status_code')); + $secondHop->save(); + $this->cleanupEntities[] = $secondHop; + + $firstHop = Redirect::create(); + $firstHop->setRedirect('/' . $sourceFinal); + $firstHop->setSource($sourceStart); + $firstHop->setLanguage($target->language()->getId()); + $firstHop->setStatusCode(\Drupal::config('redirect.settings')->get('default_status_code')); + $firstHop->save(); + $this->cleanupEntities[] = $firstHop; + + return [$sourceStart, $sourceFinal]; + } + + /** + * Tests redirect chain resolution and rich-text rewriting. + */ + public function testRedirectChainNormalizationInText(): void { + $target = $this->createNode([ + 'type' => 'org_page', + 'title' => $this->randomMachineName(), + 'status' => 1, + 'moderation_state' => 'published', + ]); + [$sourceStart, $sourceFinal] = $this->createRedirectChain($target); + + $redirectStorage = \Drupal::entityTypeManager()->getStorage('redirect'); + $matching = $redirectStorage->loadByProperties([ + 'redirect_source__path' => $sourceStart, + ]); + $this->assertNotEmpty($matching); + + /** @var \Drupal\mass_redirect_normalizer\RedirectLinkResolver $service */ + $service = \Drupal::service('mass_redirect_normalizer.resolver'); + $resolved = $service->resolveRedirectTarget('/' . $sourceStart . '?foo=1#bar'); + $targetPath = $target->toUrl()->toString(); + $this->assertTrue($resolved['changed']); + $this->assertStringContainsString($targetPath, $resolved['target_path']); + $this->assertStringContainsString('?foo=1', $resolved['target_path']); + $this->assertStringContainsString('#bar', $resolved['target_path']); + $this->assertNotEmpty($resolved['node']); + $this->assertEquals($target->id(), $resolved['node']->id()); + + $html = '

Test link

'; + $normalized = $service->normalizeRedirectLinksInText($html); + $this->assertTrue($normalized['changed']); + $this->assertStringContainsString($targetPath, $normalized['text']); + $this->assertStringContainsString('data-entity-type="node"', $normalized['text']); + $this->assertStringContainsString('data-entity-substitution="canonical"', $normalized['text']); + $this->assertStringContainsString('data-entity-uuid="' . $target->uuid() . '"', $normalized['text']); + } + + /** + * Tests link-field URI normalization to final internal path. + */ + public function testNormalizeRedirectLinkUri(): void { + $target = $this->createNode([ + 'type' => 'org_page', + 'title' => $this->randomMachineName(), + 'status' => 1, + 'moderation_state' => 'published', + ]); + [$sourceStart] = $this->createRedirectChain($target); + + /** @var \Drupal\mass_redirect_normalizer\RedirectLinkResolver $service */ + $service = \Drupal::service('mass_redirect_normalizer.resolver'); + $normalized = $service->normalizeRedirectLinkUri('internal:/' . $sourceStart . '?x=1#frag'); + $this->assertTrue($normalized['changed']); + $this->assertStringStartsWith('internal:/', $normalized['uri']); + $this->assertStringContainsString($target->toUrl()->toString(), $normalized['uri']); + $this->assertStringContainsString('?x=1', $normalized['uri']); + $this->assertStringContainsString('#frag', $normalized['uri']); + } + + /** + * Tests presave hook normalizes node rich-text links on save. + */ + public function testPresaveHookNormalizesNodeBodyOnSave(): void { + $target = $this->createNode([ + 'type' => 'org_page', + 'title' => $this->randomMachineName(), + 'status' => 1, + 'moderation_state' => 'published', + ]); + [$sourceStart] = $this->createRedirectChain($target); + + $sourceNode = $this->createNode([ + 'type' => 'page', + 'title' => $this->randomMachineName(), + 'status' => 1, + 'moderation_state' => 'published', + 'body' => [ + 'value' => '

Normalize me

', + 'format' => 'full_html', + ], + ]); + + // Trigger presave normalization on node save. + $sourceNode->save(); + + $reloaded = \Drupal::entityTypeManager()->getStorage('node')->load($sourceNode->id()); + $this->assertNotNull($reloaded); + /** @var \Drupal\node\NodeInterface $reloaded */ + $body = (string) $reloaded->get('body')->value; + $this->assertStringContainsString($target->toUrl()->toString(), $body); + $this->assertStringContainsString('data-entity-type="node"', $body); + } + + /** + * Tests looped redirects do not cause infinite processing. + */ + public function testRedirectLoopIsSafelyIgnored(): void { + $loopA = 'loop-a-' . $this->randomMachineName(); + $loopB = 'loop-b-' . $this->randomMachineName(); + + $a = Redirect::create(); + $a->setRedirect('/' . $loopB); + $a->setSource($loopA); + $a->setLanguage('en'); + $a->setStatusCode(\Drupal::config('redirect.settings')->get('default_status_code')); + $a->save(); + $this->cleanupEntities[] = $a; + + $b = Redirect::create(); + $b->setRedirect('/' . $loopA); + $b->setSource($loopB); + $b->setLanguage('en'); + $b->setStatusCode(\Drupal::config('redirect.settings')->get('default_status_code')); + $b->save(); + $this->cleanupEntities[] = $b; + + /** @var \Drupal\mass_redirect_normalizer\RedirectLinkResolver $service */ + $service = \Drupal::service('mass_redirect_normalizer.resolver'); + $resolved = $service->resolveRedirectTarget('/' . $loopA . '?x=1#frag'); + + $this->assertFalse($resolved['changed']); + } + + /** + * Tests external URLs are ignored. + */ + public function testExternalUrlIsIgnored(): void { + /** @var \Drupal\mass_redirect_normalizer\RedirectLinkResolver $service */ + $service = \Drupal::service('mass_redirect_normalizer.resolver'); + + $resolved = $service->resolveRedirectTarget('https://example.com/somewhere'); + $this->assertFalse($resolved['changed']); + + $text = '

External

'; + $normalized = $service->normalizeRedirectLinksInText($text); + $this->assertFalse($normalized['changed']); + $this->assertStringContainsString('https://example.com/somewhere', $normalized['text']); + } + + /** + * Tests non-redirect local links remain unchanged. + */ + public function testNonRedirectLocalLinkRemainsUnchanged(): void { + /** @var \Drupal\mass_redirect_normalizer\RedirectLinkResolver $service */ + $service = \Drupal::service('mass_redirect_normalizer.resolver'); + + $resolved = $service->resolveRedirectTarget('/this-path-does-not-redirect'); + $this->assertFalse($resolved['changed']); + + $uriNormalized = $service->normalizeRedirectLinkUri('internal:/this-path-does-not-redirect'); + $this->assertFalse($uriNormalized['changed']); + $this->assertSame('internal:/this-path-does-not-redirect', $uriNormalized['uri']); + } + + /** + * Tests max-depth limit prevents over-following deep chains. + */ + public function testResolveRedirectTargetRespectsMaxDepth(): void { + $target = $this->createNode([ + 'type' => 'org_page', + 'title' => $this->randomMachineName(), + 'status' => 1, + 'moderation_state' => 'published', + ]); + + $p1 = 'depth-a-' . $this->randomMachineName(); + $p2 = 'depth-b-' . $this->randomMachineName(); + $p3 = 'depth-c-' . $this->randomMachineName(); + + $r1 = Redirect::create(); + $r1->setSource($p1); + $r1->setRedirect('/' . $p2); + $r1->setLanguage('en'); + $r1->setStatusCode(301); + $r1->save(); + $this->cleanupEntities[] = $r1; + + $r2 = Redirect::create(); + $r2->setSource($p2); + $r2->setRedirect('/' . $p3); + $r2->setLanguage('en'); + $r2->setStatusCode(301); + $r2->save(); + $this->cleanupEntities[] = $r2; + + $r3 = Redirect::create(); + $r3->setSource($p3); + $r3->setRedirect('node/' . $target->id()); + $r3->setLanguage('en'); + $r3->setStatusCode(301); + $r3->save(); + $this->cleanupEntities[] = $r3; + + /** @var \Drupal\mass_redirect_normalizer\RedirectLinkResolver $service */ + $service = \Drupal::service('mass_redirect_normalizer.resolver'); + $limited = $service->resolveRedirectTarget('/' . $p1, 1); + $this->assertTrue($limited['changed']); + $this->assertSame('/' . $p2, $limited['target_path']); + + $full = $service->resolveRedirectTarget('/' . $p1, 10); + $this->assertTrue($full['changed']); + $this->assertStringContainsString($target->toUrl()->toString(), $full['target_path']); + } + + /** + * Tests redirecting to external target is ignored for rewriting. + */ + public function testRedirectToExternalTargetIsIgnored(): void { + $source = 'to-external-' . $this->randomMachineName(); + $redirect = Redirect::create(); + $redirect->setSource($source); + $redirect->setRedirect('https://example.com/outside'); + $redirect->setLanguage('en'); + $redirect->setStatusCode(301); + $redirect->save(); + $this->cleanupEntities[] = $redirect; + + /** @var \Drupal\mass_redirect_normalizer\RedirectLinkResolver $service */ + $service = \Drupal::service('mass_redirect_normalizer.resolver'); + $resolved = $service->resolveRedirectTarget('/' . $source); + $this->assertFalse($resolved['changed']); + } + + /** + * Tests alias-like final target rewrites href but does not add node metadata. + */ + public function testAliasTargetWithoutNodeDoesNotAddEntityMetadata(): void { + $source = 'to-alias-' . $this->randomMachineName(); + $redirect = Redirect::create(); + $redirect->setSource($source); + $redirect->setRedirect('/some/non-node-alias'); + $redirect->setLanguage('en'); + $redirect->setStatusCode(301); + $redirect->save(); + $this->cleanupEntities[] = $redirect; + + /** @var \Drupal\mass_redirect_normalizer\RedirectLinkResolver $service */ + $service = \Drupal::service('mass_redirect_normalizer.resolver'); + $text = '

Alias link

'; + $normalized = $service->normalizeRedirectLinksInText($text); + + $this->assertTrue($normalized['changed']); + $this->assertStringContainsString('/some/non-node-alias', $normalized['text']); + $this->assertStringNotContainsString('data-entity-type="node"', $normalized['text']); + $this->assertStringNotContainsString('data-entity-uuid=', $normalized['text']); + } + + /** + * Tests manager idempotency after first normalization. + */ + public function testManagerIsIdempotentAfterNormalization(): void { + $target = $this->createNode([ + 'type' => 'org_page', + 'title' => $this->randomMachineName(), + 'status' => 1, + 'moderation_state' => 'published', + ]); + [$sourceStart] = $this->createRedirectChain($target); + + $node = $this->createNode([ + 'type' => 'page', + 'title' => $this->randomMachineName(), + 'status' => 1, + 'moderation_state' => 'published', + 'body' => [ + 'value' => '

No redirect yet

', + 'format' => 'full_html', + ], + ]); + $node->set('body', [ + 'value' => '

Run twice

', + 'format' => 'full_html', + ]); + + /** @var \Drupal\mass_redirect_normalizer\RedirectLinkNormalizationManager $manager */ + $manager = \Drupal::service('mass_redirect_normalizer.manager'); + $first = $manager->normalizeEntity($node, TRUE); + $this->assertTrue($first['changed']); + + $reloaded = \Drupal::entityTypeManager()->getStorage('node')->load($node->id()); + $second = $manager->normalizeEntity($reloaded, TRUE); + $this->assertFalse($second['changed']); + } + + /** + * Tests command options constrain output by entity type and bundle. + */ + public function testCommandOptionsEntityTypeAndBundleFiltering(): void { + $target = $this->createNode([ + 'type' => 'org_page', + 'title' => $this->randomMachineName(), + 'status' => 1, + 'moderation_state' => 'published', + ]); + [$sourceStart] = $this->createRedirectChain($target); + + $page = $this->createNode([ + 'type' => 'page', + 'title' => $this->randomMachineName(), + 'status' => 1, + 'moderation_state' => 'published', + 'body' => [ + 'value' => '

Node-only

', + 'format' => 'full_html', + ], + ]); + + $command = new MassRedirectNormalizerCommands( + \Drupal::entityTypeManager(), + \Drupal::service('mass_redirect_normalizer.manager') + ); + $rowsObj = $command->normalizeRedirectLinks([ + 'entity-type' => 'node', + 'bundle' => 'page', + 'limit' => 0, + 'show-unchanged' => TRUE, + ]); + $rows = method_exists($rowsObj, 'getArrayCopy') ? $rowsObj->getArrayCopy() : iterator_to_array($rowsObj); + + $this->assertNotEmpty($rows); + $nonSummaryRows = array_filter($rows, fn($row) => ($row['status'] ?? '') !== 'summary'); + $this->assertNotEmpty($nonSummaryRows); + foreach ($nonSummaryRows as $row) { + $this->assertSame('node', $row['entity_type']); + $this->assertSame('page', $row['bundle']); + } + } + + /** + * Tests absolute local URL link-field normalization. + */ + public function testNormalizeAbsoluteLocalUrlLinkField(): void { + $target = $this->createNode([ + 'type' => 'org_page', + 'title' => $this->randomMachineName(), + 'status' => 1, + 'moderation_state' => 'published', + ]); + [$sourceStart] = $this->createRedirectChain($target); + + /** @var \Drupal\mass_redirect_normalizer\RedirectLinkResolver $service */ + $service = \Drupal::service('mass_redirect_normalizer.resolver'); + $normalized = $service->normalizeRedirectLinkUri('https://www.mass.gov/' . $sourceStart . '?q=1#x'); + + $this->assertTrue($normalized['changed']); + $this->assertStringStartsWith('internal:/', $normalized['uri']); + $this->assertStringContainsString($target->toUrl()->toString(), $normalized['uri']); + $this->assertStringContainsString('?q=1', $normalized['uri']); + $this->assertStringContainsString('#x', $normalized['uri']); + } + + /** + * Tests mixed multi-value link field normalization on one entity. + */ + public function testManagerNormalizesOnlyRedirectingLinksInMultiValueField(): void { + $target = $this->createNode([ + 'type' => 'org_page', + 'title' => $this->randomMachineName(), + 'status' => 1, + 'moderation_state' => 'published', + ]); + [$sourceStart] = $this->createRedirectChain($target); + + $node = $this->createNode([ + 'type' => 'org_page', + 'title' => $this->randomMachineName(), + 'status' => 1, + 'moderation_state' => 'published', + 'field_social_links' => [ + ['uri' => 'internal:/no-redirect-here', 'title' => 'unchanged-local'], + ], + ]); + $node->set('field_social_links', [ + ['uri' => 'internal:/' . $sourceStart, 'title' => 'redirecting'], + ['uri' => 'internal:/no-redirect-here', 'title' => 'unchanged-local'], + ['uri' => 'https://example.com/external', 'title' => 'external'], + ]); + + /** @var \Drupal\mass_redirect_normalizer\RedirectLinkNormalizationManager $manager */ + $manager = \Drupal::service('mass_redirect_normalizer.manager'); + $result = $manager->normalizeEntity($node, TRUE); + $this->assertTrue($result['changed']); + + $reloaded = \Drupal::entityTypeManager()->getStorage('node')->load($node->id()); + $links = $reloaded->get('field_social_links')->getValue(); + + $this->assertStringContainsString($target->toUrl()->toString(), $links[0]['uri']); + $this->assertSame('internal:/no-redirect-here', $links[1]['uri']); + $this->assertSame('https://example.com/external', $links[2]['uri']); + } + + /** + * Tests link item metadata (title/options) is preserved. + */ + public function testLinkItemMetadataIsPreservedDuringNormalization(): void { + $target = $this->createNode([ + 'type' => 'org_page', + 'title' => $this->randomMachineName(), + 'status' => 1, + 'moderation_state' => 'published', + ]); + [$sourceStart] = $this->createRedirectChain($target); + + $options = [ + 'attributes' => [ + 'class' => ['my-link-class'], + ], + ]; + $node = $this->createNode([ + 'type' => 'org_page', + 'title' => $this->randomMachineName(), + 'status' => 1, + 'moderation_state' => 'published', + 'field_social_links' => [ + [ + 'uri' => 'internal:/no-redirect-yet', + 'title' => 'initial-title', + ], + ], + ]); + $node->set('field_social_links', [ + [ + 'uri' => 'internal:/' . $sourceStart, + 'title' => 'keep-title', + 'options' => $options, + ], + ]); + + /** @var \Drupal\mass_redirect_normalizer\RedirectLinkNormalizationManager $manager */ + $manager = \Drupal::service('mass_redirect_normalizer.manager'); + $result = $manager->normalizeEntity($node, TRUE); + $this->assertTrue($result['changed']); + + $reloaded = \Drupal::entityTypeManager()->getStorage('node')->load($node->id()); + $item = $reloaded->get('field_social_links')->first(); + $this->assertNotNull($item); + $this->assertSame('keep-title', $item->title); + $this->assertSame('my-link-class', $item->options['attributes']['class'][0]); + $this->assertStringContainsString($target->toUrl()->toString(), $item->uri); + } + +} + From f93494becb8b60f86c8452b46fc8c6ff1e68e63b Mon Sep 17 00:00:00 2001 From: Arthur Baghdasaryan Date: Thu, 2 Apr 2026 14:27:17 +0400 Subject: [PATCH 2/9] PHPCS --- .../mass_redirect_normalizer/mass_redirect_normalizer.module | 1 - .../src/Drush/Commands/MassRedirectNormalizerCommands.php | 1 - .../src/RedirectLinkNormalizationManager.php | 1 - .../custom/mass_redirect_normalizer/src/RedirectLinkResolver.php | 1 - .../tests/src/ExistingSite/RedirectLinkNormalizationTest.php | 1 - 5 files changed, 5 deletions(-) diff --git a/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.module b/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.module index 73926a9569..416e791e7b 100644 --- a/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.module +++ b/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.module @@ -17,4 +17,3 @@ function mass_redirect_normalizer_entity_presave(EntityInterface $entity) { // In presave we mutate field values only; parent entity handles persistence. $manager->normalizeEntity($entity, FALSE); } - diff --git a/docroot/modules/custom/mass_redirect_normalizer/src/Drush/Commands/MassRedirectNormalizerCommands.php b/docroot/modules/custom/mass_redirect_normalizer/src/Drush/Commands/MassRedirectNormalizerCommands.php index 919a9c0a88..c894b69e14 100644 --- a/docroot/modules/custom/mass_redirect_normalizer/src/Drush/Commands/MassRedirectNormalizerCommands.php +++ b/docroot/modules/custom/mass_redirect_normalizer/src/Drush/Commands/MassRedirectNormalizerCommands.php @@ -134,4 +134,3 @@ public function normalizeRedirectLinks($options = ['limit' => 0, 'entity-type' = } } - diff --git a/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php b/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php index 50e573c746..eec535e759 100644 --- a/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php +++ b/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php @@ -98,4 +98,3 @@ public function normalizeEntity(ContentEntityInterface $entity, bool $save = TRU } } - diff --git a/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkResolver.php b/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkResolver.php index 890eddb4e8..b0cceb737b 100644 --- a/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkResolver.php +++ b/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkResolver.php @@ -205,4 +205,3 @@ private function loadRedirectBySourcePath(string $sourcePath): ?Redirect { } } - diff --git a/docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php b/docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php index 7a021b4ab2..f5e6618975 100644 --- a/docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php +++ b/docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php @@ -482,4 +482,3 @@ public function testLinkItemMetadataIsPreservedDuringNormalization(): void { } } - From 338dd0aa401a313eaf91523cfba1041120404aa4 Mon Sep 17 00:00:00 2001 From: Arthur Baghdasaryan Date: Thu, 2 Apr 2026 14:28:17 +0400 Subject: [PATCH 3/9] Changelog --- changelogs/DP-45831.yml | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 changelogs/DP-45831.yml diff --git a/changelogs/DP-45831.yml b/changelogs/DP-45831.yml new file mode 100644 index 0000000000..ce2212ad7a --- /dev/null +++ b/changelogs/DP-45831.yml @@ -0,0 +1,41 @@ +# +# Write your changelog entry here. Every pull request must have a changelog yml file. +# +# Change types: +# ############################################################################# +# You can use one of the following types: +# - Added: For new features. +# - Changed: For changes to existing functionality. +# - Deprecated: For soon-to-be removed features. +# - Removed: For removed features. +# - Fixed: For any bug fixes. +# - Security: In case of vulnerabilities. +# +# Format +# ############################################################################# +# The format is crucial. Please follow the examples below. For reference, the requirements are: +# - All 3 parts are required and you must include "Type", "description" and "issue". +# - "Type" must be left aligned and followed by a colon. +# - "description" must be indented with 2 spaces followed by a colon +# - "issue" must be indented with 4 spaces followed by a colon. +# - "issue" is for the Jira ticket number only e.g. DP-1234 +# - No extra spaces, indents, or blank lines are allowed. +# +# Example: +# ############################################################################# +# Fixed: +# - description: Fixes scrolling on edit pages in Safari. +# issue: DP-13314 +# +# You may add more than 1 description & issue for each type using the following format: +# Changed: +# - description: Automating the release branch. +# issue: DP-10166 +# - description: Second change item that needs a description. +# issue: DP-19875 +# - description: Third change item that needs a description along with an issue. +# issue: DP-19843 +# +Changed: + - description: Update intenal links on mass.gov that are redirects. + issue: DP-45831 From 92c782354e03fec88162d122e0adc28dc760934e Mon Sep 17 00:00:00 2001 From: Arthur Baghdasaryan Date: Thu, 2 Apr 2026 14:33:08 +0400 Subject: [PATCH 4/9] Change --- .../src/RedirectLinkNormalizationManager.php | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php b/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php index eec535e759..b903e4554a 100644 --- a/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php +++ b/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php @@ -4,6 +4,8 @@ use Drupal\Component\Datetime\TimeInterface; use Drupal\Core\Entity\ContentEntityInterface; +use Drupal\Core\Entity\RevisionLogInterface; +use Drupal\Core\Entity\RevisionableInterface; use Drupal\mayflower\Helper; use Drupal\paragraphs\Entity\Paragraph; @@ -70,14 +72,12 @@ public function normalizeEntity(ContentEntityInterface $entity, bool $save = TRU return ['changed' => $changed, 'skipped' => FALSE]; } - if (method_exists($entity, 'setNewRevision')) { - call_user_func([$entity, 'setNewRevision']); + if ($entity instanceof RevisionableInterface) { + $entity->setNewRevision(); } - if (method_exists($entity, 'setRevisionLogMessage')) { - call_user_func([$entity, 'setRevisionLogMessage'], 'Revision created to normalize redirected internal links.'); - } - if (method_exists($entity, 'setRevisionCreationTime')) { - call_user_func([$entity, 'setRevisionCreationTime'], $this->time->getRequestTime()); + if ($entity instanceof RevisionLogInterface) { + $entity->setRevisionLogMessage('Revision created to normalize redirected internal links.'); + $entity->setRevisionCreationTime($this->time->getRequestTime()); } $entity->save(); From f94bb7744d661f2cf2f23b122323d42a7f1bc22c Mon Sep 17 00:00:00 2001 From: Arthur Baghdasaryan Date: Thu, 2 Apr 2026 14:36:03 +0400 Subject: [PATCH 5/9] Change --- .../src/RedirectLinkNormalizationManager.php | 85 ++++++++++++------- 1 file changed, 53 insertions(+), 32 deletions(-) diff --git a/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php b/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php index b903e4554a..703bf51461 100644 --- a/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php +++ b/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php @@ -13,6 +13,8 @@ * Normalizes redirected internal links on content entities. */ class RedirectLinkNormalizationManager { + private const REVISION_MESSAGE = 'Revision created to normalize redirected internal links.'; + private const NESTED_REVISION_MESSAGE = 'Revision created to normalize redirected internal links in nested content.'; /** * Constructs the manager. @@ -44,26 +46,12 @@ public function normalizeEntity(ContentEntityInterface $entity, bool $save = TRU $fieldType = $field->getFieldDefinition()->getType(); if (in_array($fieldType, ['text_long', 'text_with_summary', 'string_long'], TRUE)) { foreach ($field as $item) { - if (!isset($item->value) || $item->value === NULL || $item->value === '') { - continue; - } - $processed = $this->resolver->normalizeRedirectLinksInText($item->value); - if ($processed['changed']) { - $item->value = $processed['text']; - $changed = TRUE; - } + $changed = $this->normalizeTextItem($item, $changed); } } elseif ($fieldType === 'link') { foreach ($field as $item) { - if (empty($item->uri)) { - continue; - } - $processed = $this->resolver->normalizeRedirectLinkUri($item->uri); - if ($processed['changed']) { - $item->uri = $processed['uri']; - $changed = TRUE; - } + $changed = $this->normalizeLinkItem($item, $changed); } } } @@ -72,29 +60,62 @@ public function normalizeEntity(ContentEntityInterface $entity, bool $save = TRU return ['changed' => $changed, 'skipped' => FALSE]; } - if ($entity instanceof RevisionableInterface) { - $entity->setNewRevision(); - } - if ($entity instanceof RevisionLogInterface) { - $entity->setRevisionLogMessage('Revision created to normalize redirected internal links.'); - $entity->setRevisionCreationTime($this->time->getRequestTime()); - } + $this->prepareRevision($entity, self::REVISION_MESSAGE); $entity->save(); if ($entity->getEntityTypeId() === 'paragraph' && $node = Helper::getParentNode($entity)) { - if (method_exists($node, 'setNewRevision')) { - $node->setNewRevision(); - } - if (method_exists($node, 'setRevisionLogMessage')) { - $node->setRevisionLogMessage('Revision created to normalize redirected internal links in nested content.'); - } - if (method_exists($node, 'setRevisionCreationTime')) { - $node->setRevisionCreationTime($this->time->getRequestTime()); - } + $this->prepareRevision($node, self::NESTED_REVISION_MESSAGE); $node->save(); } return ['changed' => TRUE, 'skipped' => FALSE]; } + /** + * Normalize a text item value and return updated changed flag. + */ + private function normalizeTextItem(object $item, bool $changed): bool { + if (!isset($item->value) || $item->value === NULL || $item->value === '') { + return $changed; + } + + $processed = $this->resolver->normalizeRedirectLinksInText($item->value); + if ($processed['changed']) { + $item->value = $processed['text']; + return TRUE; + } + + return $changed; + } + + /** + * Normalize a link item URI and return updated changed flag. + */ + private function normalizeLinkItem(object $item, bool $changed): bool { + if (empty($item->uri)) { + return $changed; + } + + $processed = $this->resolver->normalizeRedirectLinkUri($item->uri); + if ($processed['changed']) { + $item->uri = $processed['uri']; + return TRUE; + } + + return $changed; + } + + /** + * Configure revision metadata when supported by entity type. + */ + private function prepareRevision(ContentEntityInterface $entity, string $message): void { + if ($entity instanceof RevisionableInterface) { + $entity->setNewRevision(); + } + if ($entity instanceof RevisionLogInterface) { + $entity->setRevisionLogMessage($message); + $entity->setRevisionCreationTime($this->time->getRequestTime()); + } + } + } From ae8400edc34ae7ecf79ea37ea0931151bfdd58b7 Mon Sep 17 00:00:00 2001 From: Arthur Baghdasaryan Date: Mon, 6 Apr 2026 11:49:35 +0400 Subject: [PATCH 6/9] Change --- .../custom/mass_redirect_normalizer/README.md | 136 +++++++-- .../mass_redirect_normalizer.info.yml | 1 + .../mass_redirect_normalizer.module | 1 - .../MassRedirectNormalizerCommands.php | 262 ++++++++++++++---- .../src/RedirectLinkNormalizationManager.php | 147 ++++++---- .../src/RedirectLinkResolver.php | 13 +- .../RedirectLinkNormalizationTest.php | 38 ++- 7 files changed, 458 insertions(+), 140 deletions(-) diff --git a/docroot/modules/custom/mass_redirect_normalizer/README.md b/docroot/modules/custom/mass_redirect_normalizer/README.md index 3d17b30d03..fe9a073b08 100644 --- a/docroot/modules/custom/mass_redirect_normalizer/README.md +++ b/docroot/modules/custom/mass_redirect_normalizer/README.md @@ -1,30 +1,126 @@ # Redirect Link Normalizer -This module rewrites internal links that currently rely on redirects so they -point to the final destination path directly. For rich text links, the process -also adds `data-entity-*` attributes when the final target resolves to a node. +This module rewrites internal links that still point at **redirect source paths** so they use the **final path** instead. For rich text, when the final target is a node, it also adds `data-entity-*` attributes. -## Manual execution +The same logic runs in two places: -- Dry run: - - `ddev drush --simulate mass-redirect-normalizer:normalize-links --limit=500` -- Execute: - - `ddev drush mass-redirect-normalizer:normalize-links --limit=5000` -- Optional filters: - - `--entity-type=node|paragraph|all` - - `--bundle=` - - `--show-unchanged` +- **Bulk Drush command** — scan many entities and fix stored values. +- **`hook_entity_presave()`** — when an editor saves a node or paragraph, links are normalized on that save. -## Periodic execution +--- -For one-time/periodic bulk cleanup, run the Drush command above. +## What gets scanned -For ongoing maintenance, this module also normalizes links during entity save -via `hook_entity_presave()` for nodes and paragraphs. This means new edits -automatically store final target paths instead of redirecting paths. +For each **node** or **paragraph**, the code looks at: -## Post-run usage refresh +- Text fields: `text_long`, `text_with_summary`, `string_long` (HTML `href` values inside the markup). +- **Link** fields (`link` type): the stored URI. + +It does **not** change random text; it only rewrites values the resolver treats as redirect-based internal links (see integration tests for examples). + +## Mre about code + +- `RedirectLinkResolver`: + - Only link logic. + - It finds the final path and rewrites one text value or one link value. + - It does **not** save entities. +- `RedirectLinkNormalizationManager`: + - Entity workflow logic. + - It loops fields on node/paragraph, calls the resolver, handles dry-run, and saves revisions when needed. + +This split keeps code easier to test and easier to maintain. + +--- + +## Drush command + +| Item | Value | +|------|--------| +| Command | `mass-redirect-normalizer:normalize-links` | +| Alias | `mnrl` | + +### Options + +| Option | Meaning | +|--------|---------| +| `--simulate` | Dry run: **no** database writes. Same idea as global `ddev drush --simulate ...`. | +| `--limit=N` | Max entities **per entity type** to load from the query. **`0` = no limit.** When `--entity-type=all`, you get up to **N nodes** and up to **N paragraphs** (two separate caps). | +| `--entity-type=node\|paragraph\|all` | Default **`all`** (nodes and paragraphs). | +| `--bundle=...` | Only that bundle (node type or paragraph type machine name). Still checked after load. | +| `--entity-ids=1,2,3` | Only these IDs. **Requires** `--entity-type=node` or `paragraph` (**not** `all`). Ignores `--limit`. | + +### Default table columns + +| Column | Notes | +|--------|--------| +| Status | `would_update` (simulate) or `updated` (real run). | +| Entity type | `node` or `paragraph`. | +| Entity ID | Entity id. | +| Parent node ID | For **paragraphs**, the host node id from `Helper::getParentNode()`. For nodes, `-`. | +| Bundle | Bundle / type machine name. | +| URL before / URL after | This is just the link value, not full HTML. For link fields, it shows the stored path/URL. For text fields, it shows only links that changed (`href`). If many links changed in one field, they are joined with `; `. If the value is too long, CLI shortens it. | + +### What the command skips + +- **Orphan paragraphs** — paragraphs that are not attached to real host content (`Helper::isParagraphOrphan()`). They are **not** processed and **do not** appear as rows. +- Entities with **no** redirect-based links to fix produce **no** rows (empty table is normal). + +### Simulate, then run, then verify (manual QA) + +1. **Preview:** + `ddev drush mass-redirect-normalizer:normalize-links --simulate --limit=100` +2. **Apply:** + `ddev drush mass-redirect-normalizer:normalize-links --limit=100` +3. **Re-check:** run **simulate** again with the same filters. Items that were fixed should **not** show `would_update` anymore (unless something else changed them back). + +For a narrow retest after you know specific IDs: -For large backfills, regenerate entity usage to refresh orphan reports: +`ddev drush mass-redirect-normalizer:normalize-links --simulate --entity-type=paragraph --entity-ids=123,456` + +### Important detail about saved content + +On **first save**, `hook_entity_presave()` may already rewrite links in the stored field values. So if you create test content in the UI and then expect the bulk command to “see” the old redirect URL in the database, it might already be normalized. The automated tests handle that case where needed. + +--- + +## Automated tests + +Existing-site integration tests live here: + +`docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php` + +Run tests: + +```bash +ddev exec ./vendor/bin/phpunit docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php +``` + +### What is covered + +- Redirect chain resolution (including query and fragment support). +- Rich-text rewriting (`href`) and node metadata attributes (`data-entity-*`). +- Link field URI normalization (`internal:/...` and absolute local mass.gov URLs). +- Redirect loops and max-depth behavior (no infinite follow, expected stop point). +- External URL behavior (ignored; no rewrite). +- Alias-like non-node targets (rewrite link, but do not add node metadata). +- Presave normalization path for nodes (`hook_entity_presave()` behavior). +- Manager behavior: + - Run it twice gives same result (first run fixes links, second run has nothing new to fix). + - Multi-value link field handling (only redirecting values change). + - Link item metadata preservation (`title`, `options`). +- Drush command behavior: + - Entity type and bundle filters. + - Targeted runs with `--entity-ids`. + - Simulate mode row output (`would_update`) and URL before/after columns. + +--- + +## Periodic / bulk cleanup + +Use the Drush command above for one-off or scheduled bulk runs. + +--- + +## Post-run usage refresh -- `ddev drush mass-content:usage-regenerate --batch-size=1000` +For large backfills, regenerate entity usage so usage reports stay accurate. diff --git a/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.info.yml b/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.info.yml index 362c28d93b..77e87895cb 100644 --- a/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.info.yml +++ b/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.info.yml @@ -6,4 +6,5 @@ package: Custom dependencies: - mass_fields:mass_fields - mass_content:mass_content + - mayflower:mayflower diff --git a/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.module b/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.module index 416e791e7b..85bc4f9f37 100644 --- a/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.module +++ b/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.module @@ -14,6 +14,5 @@ function mass_redirect_normalizer_entity_presave(EntityInterface $entity) { /** @var \Drupal\mass_redirect_normalizer\RedirectLinkNormalizationManager $manager */ $manager = \Drupal::service('mass_redirect_normalizer.manager'); - // In presave we mutate field values only; parent entity handles persistence. $manager->normalizeEntity($entity, FALSE); } diff --git a/docroot/modules/custom/mass_redirect_normalizer/src/Drush/Commands/MassRedirectNormalizerCommands.php b/docroot/modules/custom/mass_redirect_normalizer/src/Drush/Commands/MassRedirectNormalizerCommands.php index c894b69e14..e71e87bcef 100644 --- a/docroot/modules/custom/mass_redirect_normalizer/src/Drush/Commands/MassRedirectNormalizerCommands.php +++ b/docroot/modules/custom/mass_redirect_normalizer/src/Drush/Commands/MassRedirectNormalizerCommands.php @@ -3,14 +3,18 @@ namespace Drupal\mass_redirect_normalizer\Drush\Commands; use Consolidation\OutputFormatters\StructuredData\RowsOfFields; +use Drupal\Component\Utility\Html; +use Drupal\Component\Utility\Unicode; use Drupal\Core\Entity\EntityTypeManagerInterface; use Drupal\mass_redirect_normalizer\RedirectLinkNormalizationManager; +use Drupal\mayflower\Helper; +use Drupal\paragraphs\Entity\Paragraph; use Drush\Commands\AutowireTrait; use Drush\Commands\DrushCommands; use Drush\Drush; /** - * Drush commands for redirect link normalization. + * Drush command for redirect link normalization. */ final class MassRedirectNormalizerCommands extends DrushCommands { @@ -24,38 +28,63 @@ public function __construct( } /** - * Normalizes redirected internal links in node and paragraph content. + * Normalizes redirect-based links in nodes and paragraphs. + * + * Use --simulate (or global `drush --simulate`) to preview changes only. + * Without simulation, changes are saved. * * @command mass-redirect-normalizer:normalize-links * @field-labels * status: Status * entity_type: Entity Type * entity_id: Entity ID + * parent_node_id: Parent Node ID * bundle: Bundle + * field: Field + * delta: Delta + * kind: Kind + * before: URL before + * after: URL after * details: Details - * @default-fields status,entity_type,entity_id,bundle,details + * @default-fields status,entity_type,entity_id,parent_node_id,bundle,field,before,after * @aliases mnrl - * @option limit - * @option entity-type - * @option bundle - * @option show-unchanged + * @option limit Max entities per entity type (0 = no limit). + * @option entity-type Entity type: node, paragraph, or all (default). + * @option bundle Limit to this bundle / paragraph type. + * @option entity-ids Comma-separated IDs to process only (requires + * --entity-type=node or paragraph, not all). Ignores --limit. + * @option simulate Dry-run: show diffs only; do not save (same as global `drush --simulate`). + * @usage mass-redirect-normalizer:normalize-links --simulate --limit=100 + * Preview changes. Use --format=json for machine-readable output. */ - public function normalizeRedirectLinks($options = ['limit' => 0, 'entity-type' => 'all', 'bundle' => NULL, 'show-unchanged' => FALSE]): RowsOfFields { + public function normalizeRedirectLinks( + $options = [ + 'limit' => 0, + 'entity-type' => 'all', + 'bundle' => NULL, + 'entity-ids' => NULL, + 'simulate' => FALSE, + ], + ): RowsOfFields { $_ENV['MASS_FLAGGING_BYPASS'] = TRUE; $entityTypes = $options['entity-type'] === 'all' ? ['node', 'paragraph'] : [(string) $options['entity-type']]; $limit = max(0, (int) $options['limit']); - $showUnchanged = !empty($options['show-unchanged']); + $entityIdsOption = isset($options['entity-ids']) ? trim((string) $options['entity-ids']) : ''; try { - $simulate = Drush::simulate(); + $simulate = !empty($options['simulate']) || Drush::simulate(); } catch (\RuntimeException) { - // Allow direct invocation in PHPUnit without Drush bootstrap. - $simulate = FALSE; + // Allow PHPUnit to call this command without full Drush bootstrap. + $simulate = !empty($options['simulate']); } $rows = []; $processed = 0; - $changed = 0; - $skipped = 0; + $entitiesChanged = 0; + $fieldChanges = 0; + + if ($entityIdsOption !== '' && $options['entity-type'] === 'all') { + throw new \InvalidArgumentException('The --entity-ids option requires --entity-type=node or --entity-type=paragraph.'); + } foreach ($entityTypes as $entityType) { if (!in_array($entityType, ['node', 'paragraph'], TRUE)) { @@ -63,23 +92,34 @@ public function normalizeRedirectLinks($options = ['limit' => 0, 'entity-type' = 'status' => 'unsupported', 'entity_type' => $entityType, 'entity_id' => 'N/A', + 'parent_node_id' => '-', 'bundle' => 'N/A', + 'field' => '-', + 'delta' => '-', + 'kind' => '-', + 'before' => '-', + 'after' => '-', 'details' => 'Unsupported entity type', ]; continue; } - $idField = $entityType === 'node' ? 'nid' : 'id'; - $query = $this->entityTypeManager->getStorage($entityType)->getQuery() - ->accessCheck(FALSE) - ->sort($idField); - if (!empty($options['bundle'])) { - $query->condition('type', $options['bundle']); + if ($entityIdsOption !== '') { + $ids = array_values(array_filter(array_map('intval', preg_split('/\s*,\s*/', $entityIdsOption)))); } - if ($limit > 0) { - $query->range(0, $limit); + else { + $idField = $entityType === 'node' ? 'nid' : 'id'; + $query = $this->entityTypeManager->getStorage($entityType)->getQuery() + ->accessCheck(FALSE) + ->sort($idField); + if (!empty($options['bundle'])) { + $query->condition('type', $options['bundle']); + } + if ($limit > 0) { + $query->range(0, $limit); + } + $ids = $query->execute(); } - $ids = $query->execute(); foreach ($ids as $id) { $entity = $this->entityTypeManager->getStorage($entityType)->load($id); @@ -87,50 +127,156 @@ public function normalizeRedirectLinks($options = ['limit' => 0, 'entity-type' = continue; } - $result = $this->normalizerManager->normalizeEntity($entity, !$simulate); - $processed++; - if (!empty($result['changed'])) { - $changed++; - $rows[] = [ - 'status' => $simulate ? 'would_update' : 'updated', - 'entity_type' => $entityType, - 'entity_id' => $id, - 'bundle' => $entity->bundle(), - 'details' => 'Redirect-based links normalized', - ]; + if (!empty($options['bundle']) && $entity->bundle() !== $options['bundle']) { + continue; } - elseif (!empty($result['skipped'])) { - $skipped++; - $rows[] = [ - 'status' => 'skipped', - 'entity_type' => $entityType, - 'entity_id' => $id, - 'bundle' => $entity->bundle(), - 'details' => 'Orphan paragraph skipped', - ]; + + // Skip orphan paragraphs. + if (Helper::isParagraphOrphan($entity)) { + continue; } - elseif ($showUnchanged) { - $rows[] = [ - 'status' => 'unchanged', - 'entity_type' => $entityType, - 'entity_id' => $id, - 'bundle' => $entity->bundle(), - 'details' => 'No redirect-based links found', - ]; + + $result = $this->normalizerManager->normalizeEntity($entity, !$simulate, $simulate); + $processed++; + if (!empty($result['changed'])) { + $entitiesChanged++; + $changes = $result['changes'] ?? []; + $fieldChanges += count($changes); + $parentNodeId = '-'; + if ($entityType === 'paragraph' && $entity instanceof Paragraph) { + $parentNode = Helper::getParentNode($entity); + $parentNodeId = $parentNode ? (string) $parentNode->id() : '-'; + } + foreach ($changes as $change) { + [$beforePreview, $afterPreview] = $this->buildUrlBeforeAfter( + (string) $change['kind'], + (string) $change['before'], + (string) $change['after'], + ); + $rows[] = [ + 'status' => $simulate ? 'would_update' : 'updated', + 'entity_type' => $entityType, + 'entity_id' => $id, + 'parent_node_id' => $parentNodeId, + 'bundle' => $entity->bundle(), + 'field' => $change['field'], + 'delta' => (string) $change['delta'], + 'kind' => $change['kind'], + 'before' => $beforePreview, + 'after' => $afterPreview, + 'details' => $simulate ? 'dry-run' : 'saved', + ]; + } } } } $mode = $simulate ? 'SIMULATION' : 'EXECUTION'; - $rows[] = [ - 'status' => 'summary', - 'entity_type' => 'all', - 'entity_id' => (string) $processed, - 'bundle' => 'N/A', - 'details' => "{$mode} complete. changed={$changed}; skipped={$skipped}", - ]; + if ($this->logger()) { + $this->logger()->notice((string) dt('@mode: scanned @count entities; updated: @updated; field changes: @diffs.', [ + '@mode' => $mode, + '@count' => $processed, + '@updated' => $entitiesChanged, + '@diffs' => $fieldChanges, + ])); + } return new RowsOfFields($rows); } + /** + * Builds URL-only before/after values for table output. + * + * Link fields show a readable URI/path. + * Text fields show changed href values in document order. + */ + private function buildUrlBeforeAfter(string $kind, string $before, string $after): array { + $max = 120; + if ($kind === 'link') { + return [ + $this->truncateForTable($this->formatUriForDisplay($before), $max), + $this->truncateForTable($this->formatUriForDisplay($after), $max), + ]; + } + + $beforeHrefs = $this->extractAnchorHrefs($before); + $afterHrefs = $this->extractAnchorHrefs($after); + $pairs = []; + $count = max(count($beforeHrefs), count($afterHrefs)); + for ($i = 0; $i < $count; $i++) { + $b = $beforeHrefs[$i] ?? ''; + $a = $afterHrefs[$i] ?? ''; + if ($b !== $a) { + $pairs[] = [$b, $a]; + } + } + + if ($pairs === []) { + if ($beforeHrefs !== [] || $afterHrefs !== []) { + return [ + $this->truncateForTable($beforeHrefs[0] ?? '-', $max), + $this->truncateForTable($afterHrefs[0] ?? '-', $max), + ]; + } + return ['-', '-']; + } + + $beforeUrls = implode('; ', array_column($pairs, 0)); + $afterUrls = implode('; ', array_column($pairs, 1)); + return [ + $this->truncateForTable($beforeUrls, $max), + $this->truncateForTable($afterUrls, $max), + ]; + } + + /** + * Lists href attribute values for anchors in document order. + * + * @return array + * A list of href strings. + */ + private function extractAnchorHrefs(string $html): array { + if ($html === '') { + return []; + } + $dom = Html::load($html); + $xpath = new \DOMXPath($dom); + $hrefs = []; + foreach ($xpath->query('//a[@href]') as $anchor) { + if ($anchor instanceof \DOMElement) { + $hrefs[] = (string) $anchor->getAttribute('href'); + } + } + return $hrefs; + } + + /** + * Formats link-field URIs for CLI output. + */ + private function formatUriForDisplay(string $uri): string { + $uri = trim($uri); + if ($uri === '') { + return '-'; + } + if (str_starts_with($uri, 'internal:')) { + $rest = substr($uri, strlen('internal:')); + $path = (string) parse_url($rest, PHP_URL_PATH); + $query = (string) parse_url($rest, PHP_URL_QUERY); + $fragment = (string) parse_url($rest, PHP_URL_FRAGMENT); + $out = ($path !== '' ? $path : '/') . ($query !== '' ? '?' . $query : '') . ($fragment !== '' ? '#' . $fragment : ''); + return $out !== '' ? $out : $uri; + } + return $uri; + } + + /** + * Shortens long values for the CLI table. + */ + private function truncateForTable(string $text, int $max = 72): string { + if (mb_strlen($text) <= $max) { + return $text; + } + return Unicode::truncate($text, $max, FALSE, TRUE); + } + } diff --git a/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php b/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php index 703bf51461..fcbbc679ef 100644 --- a/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php +++ b/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php @@ -10,14 +10,17 @@ use Drupal\paragraphs\Entity\Paragraph; /** - * Normalizes redirected internal links on content entities. + * Manager class: entity processing and save flow. + * + * This class loops entity fields, calls the resolver, and decides if/when + * to save revisions. The resolver class only handles link rewrite logic. */ class RedirectLinkNormalizationManager { private const REVISION_MESSAGE = 'Revision created to normalize redirected internal links.'; private const NESTED_REVISION_MESSAGE = 'Revision created to normalize redirected internal links in nested content.'; /** - * Constructs the manager. + * Creates the manager. */ public function __construct( protected RedirectLinkResolver $resolver, @@ -26,38 +29,44 @@ public function __construct( } /** - * Processes redirect-based links in an entity. + * Normalizes redirect-based links on one entity. * * @param \Drupal\Core\Entity\ContentEntityInterface $entity * Node or paragraph entity. * @param bool $save - * Whether to persist updates. + * If TRUE, save changes and create revisions when possible. + * @param bool $dryRun + * If TRUE, only collect changes and do not write values. * * @return array - * Processing result. + * Result with keys: changed, skipped, and changes. */ - public function normalizeEntity(ContentEntityInterface $entity, bool $save = TRUE): array { + public function normalizeEntity(ContentEntityInterface $entity, bool $save = TRUE, bool $dryRun = FALSE): array { if ($entity instanceof Paragraph && Helper::isParagraphOrphan($entity)) { - return ['changed' => FALSE, 'skipped' => TRUE]; + return ['changed' => FALSE, 'skipped' => TRUE, 'changes' => []]; } - $changed = FALSE; - foreach ($entity->getFields() as $field) { - $fieldType = $field->getFieldDefinition()->getType(); - if (in_array($fieldType, ['text_long', 'text_with_summary', 'string_long'], TRUE)) { - foreach ($field as $item) { - $changed = $this->normalizeTextItem($item, $changed); - } - } - elseif ($fieldType === 'link') { - foreach ($field as $item) { - $changed = $this->normalizeLinkItem($item, $changed); - } - } + $apply = !$dryRun; + $result = $this->collectFieldNormalizations($entity, $apply); + + if (!$result['changed']) { + return ['changed' => FALSE, 'skipped' => FALSE, 'changes' => []]; + } + + if ($dryRun) { + return [ + 'changed' => TRUE, + 'skipped' => FALSE, + 'changes' => $result['changes'], + ]; } - if (!$changed || !$save) { - return ['changed' => $changed, 'skipped' => FALSE]; + if (!$save) { + return [ + 'changed' => TRUE, + 'skipped' => FALSE, + 'changes' => $result['changes'], + ]; } $this->prepareRevision($entity, self::REVISION_MESSAGE); @@ -68,45 +77,81 @@ public function normalizeEntity(ContentEntityInterface $entity, bool $save = TRU $node->save(); } - return ['changed' => TRUE, 'skipped' => FALSE]; - } - - /** - * Normalize a text item value and return updated changed flag. - */ - private function normalizeTextItem(object $item, bool $changed): bool { - if (!isset($item->value) || $item->value === NULL || $item->value === '') { - return $changed; - } - - $processed = $this->resolver->normalizeRedirectLinksInText($item->value); - if ($processed['changed']) { - $item->value = $processed['text']; - return TRUE; - } - - return $changed; + return [ + 'changed' => TRUE, + 'skipped' => FALSE, + 'changes' => $result['changes'], + ]; } /** - * Normalize a link item URI and return updated changed flag. + * Scans text and link fields and updates values when needed. + * + * @return array + * An array with keys: + * - changed (bool): TRUE when at least one value changed. + * - changes (array): List of changed items (field, delta, kind, before, + * after). */ - private function normalizeLinkItem(object $item, bool $changed): bool { - if (empty($item->uri)) { - return $changed; - } + private function collectFieldNormalizations(ContentEntityInterface $entity, bool $apply): array { + $changed = FALSE; + $changes = []; - $processed = $this->resolver->normalizeRedirectLinkUri($item->uri); - if ($processed['changed']) { - $item->uri = $processed['uri']; - return TRUE; + foreach ($entity->getFields() as $fieldName => $field) { + $fieldType = $field->getFieldDefinition()->getType(); + if (in_array($fieldType, ['text_long', 'text_with_summary', 'string_long'], TRUE)) { + foreach ($field as $delta => $item) { + if (!isset($item->value) || $item->value === NULL || $item->value === '') { + continue; + } + $before = (string) $item->value; + $processed = $this->resolver->normalizeRedirectLinksInText($before); + if (!$processed['changed']) { + continue; + } + $changed = TRUE; + $changes[] = [ + 'field' => (string) $fieldName, + 'delta' => (int) $delta, + 'kind' => 'text', + 'before' => $before, + 'after' => $processed['text'], + ]; + if ($apply) { + $item->value = $processed['text']; + } + } + } + elseif ($fieldType === 'link') { + foreach ($field as $delta => $item) { + if (empty($item->uri)) { + continue; + } + $before = (string) $item->uri; + $processed = $this->resolver->normalizeRedirectLinkUri($before); + if (!$processed['changed']) { + continue; + } + $changed = TRUE; + $changes[] = [ + 'field' => (string) $fieldName, + 'delta' => (int) $delta, + 'kind' => 'link', + 'before' => $before, + 'after' => $processed['uri'], + ]; + if ($apply) { + $item->uri = $processed['uri']; + } + } + } } - return $changed; + return ['changed' => $changed, 'changes' => $changes]; } /** - * Configure revision metadata when supported by entity type. + * Sets revision data if the entity supports revisions. */ private function prepareRevision(ContentEntityInterface $entity, string $message): void { if ($entity instanceof RevisionableInterface) { diff --git a/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkResolver.php b/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkResolver.php index b0cceb737b..9e6cc4708a 100644 --- a/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkResolver.php +++ b/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkResolver.php @@ -11,12 +11,15 @@ use Symfony\Component\Routing\RequestContext; /** - * Resolves and rewrites redirect-based internal links. + * Resolver class: pure link rewrite logic. + * + * This class only answers "what should this link become?". + * It does not loop entity fields and does not save entities. */ class RedirectLinkResolver { /** - * Creates a resolver instance. + * Creates the resolver. */ public function __construct( protected EntityTypeManagerInterface $entityTypeManager, @@ -27,7 +30,7 @@ public function __construct( } /** - * Normalizes redirected internal links in rich text. + * Rewrites redirect-based links in rich text. */ public function normalizeRedirectLinksInText(string $text): array { $dom = Html::load($text); @@ -60,7 +63,7 @@ public function normalizeRedirectLinksInText(string $text): array { } /** - * Normalizes redirected internal links in link fields. + * Rewrites redirect-based links in link fields. */ public function normalizeRedirectLinkUri(string $uri): array { $resolved = $this->resolveRedirectTarget($uri); @@ -78,7 +81,7 @@ public function normalizeRedirectLinkUri(string $uri): array { } /** - * Resolves an internal URL/path through redirect chains. + * Follows redirect chain and returns the final local path. */ public function resolveRedirectTarget(string $url, int $maxDepth = 10): array { $parsed = parse_url($url) ?: []; diff --git a/docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php b/docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php index f5e6618975..d2095ad41c 100644 --- a/docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php +++ b/docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php @@ -49,7 +49,7 @@ public function testRedirectChainNormalizationInText(): void { 'status' => 1, 'moderation_state' => 'published', ]); - [$sourceStart, $sourceFinal] = $this->createRedirectChain($target); + [$sourceStart] = $this->createRedirectChain($target); $redirectStorage = \Drupal::entityTypeManager()->getStorage('redirect'); $matching = $redirectStorage->loadByProperties([ @@ -347,6 +347,29 @@ public function testCommandOptionsEntityTypeAndBundleFiltering(): void { ], ]); + // Presave hook rewrites redirect links on first save, so the stored body no + // longer contains the redirect path. Put the redirect URL back in the DB so + // the bulk command (which loads from storage) has something to normalize. + $redirect_markup = '

Node-only

'; + $nid = (int) $page->id(); + $vid = (int) $page->getRevisionId(); + $connection = \Drupal::database(); + foreach (['node__body', 'node_revision__body'] as $table) { + $connection->update($table) + ->fields(['body_value' => $redirect_markup]) + ->condition('entity_id', $nid) + ->condition('revision_id', $vid) + ->execute(); + } + \Drupal::entityTypeManager()->getStorage('node')->resetCache([$nid]); + + /** @var \Drupal\mass_redirect_normalizer\RedirectLinkNormalizationManager $manager */ + $manager = \Drupal::service('mass_redirect_normalizer.manager'); + $reloaded = \Drupal::entityTypeManager()->getStorage('node')->load($nid); + $this->assertNotNull($reloaded); + $dryPreview = $manager->normalizeEntity($reloaded, FALSE, TRUE); + $this->assertNotEmpty($dryPreview['changed'], 'Dry run should detect redirect-based link in body.'); + $command = new MassRedirectNormalizerCommands( \Drupal::entityTypeManager(), \Drupal::service('mass_redirect_normalizer.manager') @@ -354,17 +377,18 @@ public function testCommandOptionsEntityTypeAndBundleFiltering(): void { $rowsObj = $command->normalizeRedirectLinks([ 'entity-type' => 'node', 'bundle' => 'page', + 'entity-ids' => (string) $page->id(), 'limit' => 0, - 'show-unchanged' => TRUE, + 'simulate' => TRUE, ]); $rows = method_exists($rowsObj, 'getArrayCopy') ? $rowsObj->getArrayCopy() : iterator_to_array($rowsObj); $this->assertNotEmpty($rows); - $nonSummaryRows = array_filter($rows, fn($row) => ($row['status'] ?? '') !== 'summary'); - $this->assertNotEmpty($nonSummaryRows); - foreach ($nonSummaryRows as $row) { + foreach ($rows as $row) { $this->assertSame('node', $row['entity_type']); $this->assertSame('page', $row['bundle']); + $this->assertSame('would_update', $row['status']); + $this->assertNotSame($row['before'], $row['after']); } } @@ -424,6 +448,8 @@ public function testManagerNormalizesOnlyRedirectingLinksInMultiValueField(): vo $this->assertTrue($result['changed']); $reloaded = \Drupal::entityTypeManager()->getStorage('node')->load($node->id()); + $this->assertNotNull($reloaded); + /** @var \Drupal\node\NodeInterface $reloaded */ $links = $reloaded->get('field_social_links')->getValue(); $this->assertStringContainsString($target->toUrl()->toString(), $links[0]['uri']); @@ -474,6 +500,8 @@ public function testLinkItemMetadataIsPreservedDuringNormalization(): void { $this->assertTrue($result['changed']); $reloaded = \Drupal::entityTypeManager()->getStorage('node')->load($node->id()); + $this->assertNotNull($reloaded); + /** @var \Drupal\node\NodeInterface $reloaded */ $item = $reloaded->get('field_social_links')->first(); $this->assertNotNull($item); $this->assertSame('keep-title', $item->title); From b703307048a9407c7862a2143c6d209ca81c3eed Mon Sep 17 00:00:00 2001 From: Arthur Baghdasaryan Date: Tue, 7 Apr 2026 13:57:34 +0400 Subject: [PATCH 7/9] DP-45831 --- .../custom/mass_redirect_normalizer/README.md | 23 ++++++- .../MassRedirectNormalizerCommands.php | 67 ++++++++++++++++++- .../RedirectLinkNormalizationTest.php | 36 ++++++++++ 3 files changed, 123 insertions(+), 3 deletions(-) diff --git a/docroot/modules/custom/mass_redirect_normalizer/README.md b/docroot/modules/custom/mass_redirect_normalizer/README.md index fe9a073b08..71480e830f 100644 --- a/docroot/modules/custom/mass_redirect_normalizer/README.md +++ b/docroot/modules/custom/mass_redirect_normalizer/README.md @@ -18,7 +18,7 @@ For each **node** or **paragraph**, the code looks at: It does **not** change random text; it only rewrites values the resolver treats as redirect-based internal links (see integration tests for examples). -## Mre about code +## Why there are two classes - `RedirectLinkResolver`: - Only link logic. @@ -28,7 +28,7 @@ It does **not** change random text; it only rewrites values the resolver treats - Entity workflow logic. - It loops fields on node/paragraph, calls the resolver, handles dry-run, and saves revisions when needed. -This split keeps code easier to test and easier to maintain. +This split makes the code easier to test and maintain. --- @@ -49,6 +49,13 @@ This split keeps code easier to test and easier to maintain. | `--bundle=...` | Only that bundle (node type or paragraph type machine name). Still checked after load. | | `--entity-ids=1,2,3` | Only these IDs. **Requires** `--entity-type=node` or `paragraph` (**not** `all`). Ignores `--limit`. | +By default, bulk command processes only **published** content. + +- Nodes must be published. +- Paragraphs are processed only when their parent node is published. +- If a published node has a newer unpublished draft revision, that node and its + child paragraphs are skipped by bulk command (so we do not touch draft work). + ### Default table columns | Column | Notes | @@ -64,6 +71,8 @@ This split keeps code easier to test and easier to maintain. - **Orphan paragraphs** — paragraphs that are not attached to real host content (`Helper::isParagraphOrphan()`). They are **not** processed and **do not** appear as rows. - Entities with **no** redirect-based links to fix produce **no** rows (empty table is normal). +- Unpublished/trashed content is skipped. +- Published content with newer unpublished draft revisions is skipped. ### Simulate, then run, then verify (manual QA) @@ -73,6 +82,9 @@ This split keeps code easier to test and easier to maintain. `ddev drush mass-redirect-normalizer:normalize-links --limit=100` 3. **Re-check:** run **simulate** again with the same filters. Items that were fixed should **not** show `would_update` anymore (unless something else changed them back). +For big runs, command prints progress notice every 100 scanned entities. This +is expected and helps confirm it is still running. + For a narrow retest after you know specific IDs: `ddev drush mass-redirect-normalizer:normalize-links --simulate --entity-type=paragraph --entity-ids=123,456` @@ -81,6 +93,13 @@ For a narrow retest after you know specific IDs: On **first save**, `hook_entity_presave()` may already rewrite links in the stored field values. So if you create test content in the UI and then expect the bulk command to “see” the old redirect URL in the database, it might already be normalized. The automated tests handle that case where needed. +Document links in entity-reference-only fields: + +- If the field stores only an entity reference (no URL/href string), this + command does not rewrite that stored reference value. +- If a document URL appears in supported text/link fields and points through a + redirect, it is covered by this command. + --- ## Automated tests diff --git a/docroot/modules/custom/mass_redirect_normalizer/src/Drush/Commands/MassRedirectNormalizerCommands.php b/docroot/modules/custom/mass_redirect_normalizer/src/Drush/Commands/MassRedirectNormalizerCommands.php index e71e87bcef..99eca6f909 100644 --- a/docroot/modules/custom/mass_redirect_normalizer/src/Drush/Commands/MassRedirectNormalizerCommands.php +++ b/docroot/modules/custom/mass_redirect_normalizer/src/Drush/Commands/MassRedirectNormalizerCommands.php @@ -8,6 +8,7 @@ use Drupal\Core\Entity\EntityTypeManagerInterface; use Drupal\mass_redirect_normalizer\RedirectLinkNormalizationManager; use Drupal\mayflower\Helper; +use Drupal\node\NodeInterface; use Drupal\paragraphs\Entity\Paragraph; use Drush\Commands\AutowireTrait; use Drush\Commands\DrushCommands; @@ -81,6 +82,7 @@ public function normalizeRedirectLinks( $processed = 0; $entitiesChanged = 0; $fieldChanges = 0; + $progressEvery = 100; if ($entityIdsOption !== '' && $options['entity-type'] === 'all') { throw new \InvalidArgumentException('The --entity-ids option requires --entity-type=node or --entity-type=paragraph.'); @@ -132,12 +134,25 @@ public function normalizeRedirectLinks( } // Skip orphan paragraphs. - if (Helper::isParagraphOrphan($entity)) { + if ($entityType === 'paragraph' && Helper::isParagraphOrphan($entity)) { + continue; + } + + if (!$this->isEntityEligibleForNormalization($entityType, $entity)) { continue; } $result = $this->normalizerManager->normalizeEntity($entity, !$simulate, $simulate); $processed++; + if ($this->logger() && $processed % $progressEvery === 0) { + $this->logger()->notice((string) dt('Progress: scanned @count entities; updated @updated; field changes @diffs. Last @type:@id', [ + '@count' => $processed, + '@updated' => $entitiesChanged, + '@diffs' => $fieldChanges, + '@type' => $entityType, + '@id' => $id, + ])); + } if (!empty($result['changed'])) { $entitiesChanged++; $changes = $result['changes'] ?? []; @@ -184,6 +199,56 @@ public function normalizeRedirectLinks( return new RowsOfFields($rows); } + /** + * Checks if this entity should be processed by bulk normalization. + * + * Bulk command targets published content only and skips nodes/paragraphs when + * the parent node has a newer unpublished draft revision. + */ + private function isEntityEligibleForNormalization(string $entityType, object $entity): bool { + if ($entityType === 'node') { + if (!$entity instanceof NodeInterface) { + return FALSE; + } + if (!$entity->isPublished()) { + return FALSE; + } + return !$this->hasNewerUnpublishedDraft($entity); + } + + if ($entityType === 'paragraph') { + if (!$entity instanceof Paragraph) { + return FALSE; + } + $parentNode = Helper::getParentNode($entity); + if (!$parentNode instanceof NodeInterface || !$parentNode->isPublished()) { + return FALSE; + } + return !$this->hasNewerUnpublishedDraft($parentNode); + } + + return FALSE; + } + + /** + * Returns TRUE when latest node revision is unpublished and newer. + */ + private function hasNewerUnpublishedDraft(NodeInterface $node): bool { + $storage = $this->entityTypeManager->getStorage('node'); + $latestRevisionId = $storage->getLatestRevisionId($node->id()); + if (!$latestRevisionId || (int) $latestRevisionId === (int) $node->getRevisionId()) { + return FALSE; + } + + $revisions = $storage->loadMultipleRevisions([(int) $latestRevisionId]); + $latest = $revisions[(int) $latestRevisionId] ?? NULL; + if (!$latest instanceof NodeInterface) { + return FALSE; + } + + return !$latest->isPublished(); + } + /** * Builds URL-only before/after values for table output. * diff --git a/docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php b/docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php index d2095ad41c..6126742003 100644 --- a/docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php +++ b/docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php @@ -392,6 +392,42 @@ public function testCommandOptionsEntityTypeAndBundleFiltering(): void { } } + /** + * Tests command skips unpublished nodes. + */ + public function testCommandSkipsUnpublishedNode(): void { + $target = $this->createNode([ + 'type' => 'org_page', + 'title' => $this->randomMachineName(), + 'status' => 1, + 'moderation_state' => 'published', + ]); + [$sourceStart] = $this->createRedirectChain($target); + + $unpublished = $this->createNode([ + 'type' => 'page', + 'title' => $this->randomMachineName(), + 'status' => 0, + 'moderation_state' => 'draft', + 'body' => [ + 'value' => '

Unpublished node

', + 'format' => 'full_html', + ], + ]); + + $command = new MassRedirectNormalizerCommands( + \Drupal::entityTypeManager(), + \Drupal::service('mass_redirect_normalizer.manager') + ); + $rowsObj = $command->normalizeRedirectLinks([ + 'entity-type' => 'node', + 'entity-ids' => (string) $unpublished->id(), + 'simulate' => TRUE, + ]); + $rows = method_exists($rowsObj, 'getArrayCopy') ? $rowsObj->getArrayCopy() : iterator_to_array($rowsObj); + $this->assertSame([], $rows); + } + /** * Tests absolute local URL link-field normalization. */ From edc86a489ee551974bbf9904f77bf318c2971ad8 Mon Sep 17 00:00:00 2001 From: Arthur Baghdasaryan Date: Tue, 7 Apr 2026 13:59:27 +0400 Subject: [PATCH 8/9] DP-45831 --- .../RedirectLinkNormalizationTest.php | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php b/docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php index 6126742003..7afd86dc90 100644 --- a/docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php +++ b/docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php @@ -287,6 +287,56 @@ public function testAliasTargetWithoutNodeDoesNotAddEntityMetadata(): void { $this->assertStringNotContainsString('data-entity-uuid=', $normalized['text']); } + /** + * Tests redirected document links are rewritten in rich text. + */ + public function testDocumentRedirectIsNormalizedInText(): void { + $source = 'doc-source-' . $this->randomMachineName(); + $target = '/sites/default/files/documents/example.pdf'; + + $redirect = Redirect::create(); + $redirect->setSource($source); + $redirect->setRedirect($target); + $redirect->setLanguage('en'); + $redirect->setStatusCode(301); + $redirect->save(); + $this->cleanupEntities[] = $redirect; + + /** @var \Drupal\mass_redirect_normalizer\RedirectLinkResolver $service */ + $service = \Drupal::service('mass_redirect_normalizer.resolver'); + $text = '

Doc link

'; + $normalized = $service->normalizeRedirectLinksInText($text); + + $this->assertTrue($normalized['changed']); + $this->assertStringContainsString($target . '?dl=1#frag', $normalized['text']); + // Document targets are not node canonical paths, so node metadata is absent. + $this->assertStringNotContainsString('data-entity-type="node"', $normalized['text']); + $this->assertStringNotContainsString('data-entity-uuid=', $normalized['text']); + } + + /** + * Tests redirected document links are rewritten in link fields. + */ + public function testDocumentRedirectIsNormalizedInLinkField(): void { + $source = 'doc-link-source-' . $this->randomMachineName(); + $target = '/sites/default/files/documents/example-2.pdf'; + + $redirect = Redirect::create(); + $redirect->setSource($source); + $redirect->setRedirect($target); + $redirect->setLanguage('en'); + $redirect->setStatusCode(301); + $redirect->save(); + $this->cleanupEntities[] = $redirect; + + /** @var \Drupal\mass_redirect_normalizer\RedirectLinkResolver $service */ + $service = \Drupal::service('mass_redirect_normalizer.resolver'); + $normalized = $service->normalizeRedirectLinkUri('internal:/' . $source . '?download=1#part'); + + $this->assertTrue($normalized['changed']); + $this->assertSame('internal:' . $target . '?download=1#part', $normalized['uri']); + } + /** * Tests manager idempotency after first normalization. */ From bb0c60a07113cb984f623e525db1bd02cd15ca7b Mon Sep 17 00:00:00 2001 From: Arthur Baghdasaryan Date: Tue, 7 Apr 2026 14:18:26 +0400 Subject: [PATCH 9/9] DP-45831 --- .../custom/mass_redirect_normalizer/README.md | 11 +- .../MassRedirectNormalizerCommands.php | 100 ++++++++++-------- .../RedirectLinkNormalizationTest.php | 7 +- 3 files changed, 63 insertions(+), 55 deletions(-) diff --git a/docroot/modules/custom/mass_redirect_normalizer/README.md b/docroot/modules/custom/mass_redirect_normalizer/README.md index 71480e830f..1d53a4f1e8 100644 --- a/docroot/modules/custom/mass_redirect_normalizer/README.md +++ b/docroot/modules/custom/mass_redirect_normalizer/README.md @@ -44,10 +44,9 @@ This split makes the code easier to test and maintain. | Option | Meaning | |--------|---------| | `--simulate` | Dry run: **no** database writes. Same idea as global `ddev drush --simulate ...`. | -| `--limit=N` | Max entities **per entity type** to load from the query. **`0` = no limit.** When `--entity-type=all`, you get up to **N nodes** and up to **N paragraphs** (two separate caps). | -| `--entity-type=node\|paragraph\|all` | Default **`all`** (nodes and paragraphs). | +| `--limit=N` | Max eligible entities to process **total** across node + paragraph. Command stops when it reaches `N`. **`0` = no limit. | | `--bundle=...` | Only that bundle (node type or paragraph type machine name). Still checked after load. | -| `--entity-ids=1,2,3` | Only these IDs. **Requires** `--entity-type=node` or `paragraph` (**not** `all`). Ignores `--limit`. | +| `--entity-ids=1,2,3` | Only these IDs. IDs are checked in both node and paragraph entities. Ignores `--limit`. | By default, bulk command processes only **published** content. @@ -82,12 +81,12 @@ By default, bulk command processes only **published** content. `ddev drush mass-redirect-normalizer:normalize-links --limit=100` 3. **Re-check:** run **simulate** again with the same filters. Items that were fixed should **not** show `would_update` anymore (unless something else changed them back). -For big runs, command prints progress notice every 100 scanned entities. This +For big runs, command prints progress notice every 100 processed entities. This is expected and helps confirm it is still running. For a narrow retest after you know specific IDs: -`ddev drush mass-redirect-normalizer:normalize-links --simulate --entity-type=paragraph --entity-ids=123,456` +`ddev drush mass-redirect-normalizer:normalize-links --simulate --entity-ids=123,456` ### Important detail about saved content @@ -128,7 +127,7 @@ ddev exec ./vendor/bin/phpunit docroot/modules/custom/mass_redirect_normalizer/t - Multi-value link field handling (only redirecting values change). - Link item metadata preservation (`title`, `options`). - Drush command behavior: - - Entity type and bundle filters. + - Bundle filter. - Targeted runs with `--entity-ids`. - Simulate mode row output (`would_update`) and URL before/after columns. diff --git a/docroot/modules/custom/mass_redirect_normalizer/src/Drush/Commands/MassRedirectNormalizerCommands.php b/docroot/modules/custom/mass_redirect_normalizer/src/Drush/Commands/MassRedirectNormalizerCommands.php index 99eca6f909..67a09def96 100644 --- a/docroot/modules/custom/mass_redirect_normalizer/src/Drush/Commands/MassRedirectNormalizerCommands.php +++ b/docroot/modules/custom/mass_redirect_normalizer/src/Drush/Commands/MassRedirectNormalizerCommands.php @@ -49,11 +49,10 @@ public function __construct( * details: Details * @default-fields status,entity_type,entity_id,parent_node_id,bundle,field,before,after * @aliases mnrl - * @option limit Max entities per entity type (0 = no limit). - * @option entity-type Entity type: node, paragraph, or all (default). + * @option limit Max eligible entities to process total (0 = no limit). * @option bundle Limit to this bundle / paragraph type. - * @option entity-ids Comma-separated IDs to process only (requires - * --entity-type=node or paragraph, not all). Ignores --limit. + * @option entity-ids Comma-separated IDs to process only. IDs are checked + * against both node and paragraph entities. Ignores --limit. * @option simulate Dry-run: show diffs only; do not save (same as global `drush --simulate`). * @usage mass-redirect-normalizer:normalize-links --simulate --limit=100 * Preview changes. Use --format=json for machine-readable output. @@ -61,15 +60,14 @@ public function __construct( public function normalizeRedirectLinks( $options = [ 'limit' => 0, - 'entity-type' => 'all', 'bundle' => NULL, 'entity-ids' => NULL, 'simulate' => FALSE, ], ): RowsOfFields { $_ENV['MASS_FLAGGING_BYPASS'] = TRUE; - $entityTypes = $options['entity-type'] === 'all' ? ['node', 'paragraph'] : [(string) $options['entity-type']]; - $limit = max(0, (int) $options['limit']); + $entityTypes = ['node', 'paragraph']; + $limit = max(0, (int) ($options['limit'] ?? 0)); $entityIdsOption = isset($options['entity-ids']) ? trim((string) $options['entity-ids']) : ''; try { $simulate = !empty($options['simulate']) || Drush::simulate(); @@ -81,31 +79,12 @@ public function normalizeRedirectLinks( $rows = []; $processed = 0; $entitiesChanged = 0; - $fieldChanges = 0; + $valueUpdates = 0; $progressEvery = 100; - - if ($entityIdsOption !== '' && $options['entity-type'] === 'all') { - throw new \InvalidArgumentException('The --entity-ids option requires --entity-type=node or --entity-type=paragraph.'); - } + $nodePublishedCache = []; + $newerDraftCache = []; foreach ($entityTypes as $entityType) { - if (!in_array($entityType, ['node', 'paragraph'], TRUE)) { - $rows[] = [ - 'status' => 'unsupported', - 'entity_type' => $entityType, - 'entity_id' => 'N/A', - 'parent_node_id' => '-', - 'bundle' => 'N/A', - 'field' => '-', - 'delta' => '-', - 'kind' => '-', - 'before' => '-', - 'after' => '-', - 'details' => 'Unsupported entity type', - ]; - continue; - } - if ($entityIdsOption !== '') { $ids = array_values(array_filter(array_map('intval', preg_split('/\s*,\s*/', $entityIdsOption)))); } @@ -124,6 +103,10 @@ public function normalizeRedirectLinks( } foreach ($ids as $id) { + if ($limit > 0 && $processed >= $limit) { + break 2; + } + $entity = $this->entityTypeManager->getStorage($entityType)->load($id); if (!$entity) { continue; @@ -138,17 +121,22 @@ public function normalizeRedirectLinks( continue; } - if (!$this->isEntityEligibleForNormalization($entityType, $entity)) { + if (!$this->isEntityEligibleForNormalization( + $entityType, + $entity, + $nodePublishedCache, + $newerDraftCache, + )) { continue; } $result = $this->normalizerManager->normalizeEntity($entity, !$simulate, $simulate); $processed++; if ($this->logger() && $processed % $progressEvery === 0) { - $this->logger()->notice((string) dt('Progress: scanned @count entities; updated @updated; field changes @diffs. Last @type:@id', [ + $this->logger()->notice((string) dt('Progress: processed @count entities; updated @updated; value updates @diffs. Last @type:@id', [ '@count' => $processed, '@updated' => $entitiesChanged, - '@diffs' => $fieldChanges, + '@diffs' => $valueUpdates, '@type' => $entityType, '@id' => $id, ])); @@ -156,7 +144,7 @@ public function normalizeRedirectLinks( if (!empty($result['changed'])) { $entitiesChanged++; $changes = $result['changes'] ?? []; - $fieldChanges += count($changes); + $valueUpdates += count($changes); $parentNodeId = '-'; if ($entityType === 'paragraph' && $entity instanceof Paragraph) { $parentNode = Helper::getParentNode($entity); @@ -188,11 +176,13 @@ public function normalizeRedirectLinks( $mode = $simulate ? 'SIMULATION' : 'EXECUTION'; if ($this->logger()) { - $this->logger()->notice((string) dt('@mode: scanned @count entities; updated: @updated; field changes: @diffs.', [ + $limitText = $limit > 0 ? (string) $limit : 'none'; + $this->logger()->notice((string) dt('@mode: processed @count entities (limit: @limit); updated entities: @updated; value updates: @diffs.', [ '@mode' => $mode, '@count' => $processed, + '@limit' => $limitText, '@updated' => $entitiesChanged, - '@diffs' => $fieldChanges, + '@diffs' => $valueUpdates, ])); } @@ -205,15 +195,23 @@ public function normalizeRedirectLinks( * Bulk command targets published content only and skips nodes/paragraphs when * the parent node has a newer unpublished draft revision. */ - private function isEntityEligibleForNormalization(string $entityType, object $entity): bool { + private function isEntityEligibleForNormalization( + string $entityType, + object $entity, + array &$nodePublishedCache, + array &$newerDraftCache, + ): bool { if ($entityType === 'node') { if (!$entity instanceof NodeInterface) { return FALSE; } - if (!$entity->isPublished()) { + $nodeId = (int) $entity->id(); + $isPublished = $nodePublishedCache[$nodeId] ?? $entity->isPublished(); + $nodePublishedCache[$nodeId] = $isPublished; + if (!$isPublished) { return FALSE; } - return !$this->hasNewerUnpublishedDraft($entity); + return !$this->hasNewerUnpublishedDraft($entity, $newerDraftCache); } if ($entityType === 'paragraph') { @@ -221,10 +219,16 @@ private function isEntityEligibleForNormalization(string $entityType, object $en return FALSE; } $parentNode = Helper::getParentNode($entity); - if (!$parentNode instanceof NodeInterface || !$parentNode->isPublished()) { + if (!$parentNode instanceof NodeInterface) { + return FALSE; + } + $parentNodeId = (int) $parentNode->id(); + $parentPublished = $nodePublishedCache[$parentNodeId] ?? $parentNode->isPublished(); + $nodePublishedCache[$parentNodeId] = $parentPublished; + if (!$parentPublished) { return FALSE; } - return !$this->hasNewerUnpublishedDraft($parentNode); + return !$this->hasNewerUnpublishedDraft($parentNode, $newerDraftCache); } return FALSE; @@ -233,20 +237,28 @@ private function isEntityEligibleForNormalization(string $entityType, object $en /** * Returns TRUE when latest node revision is unpublished and newer. */ - private function hasNewerUnpublishedDraft(NodeInterface $node): bool { + private function hasNewerUnpublishedDraft(NodeInterface $node, array &$cache): bool { + $nodeId = (int) $node->id(); + if (array_key_exists($nodeId, $cache)) { + return $cache[$nodeId]; + } + $storage = $this->entityTypeManager->getStorage('node'); $latestRevisionId = $storage->getLatestRevisionId($node->id()); if (!$latestRevisionId || (int) $latestRevisionId === (int) $node->getRevisionId()) { - return FALSE; + $cache[$nodeId] = FALSE; + return $cache[$nodeId]; } $revisions = $storage->loadMultipleRevisions([(int) $latestRevisionId]); $latest = $revisions[(int) $latestRevisionId] ?? NULL; if (!$latest instanceof NodeInterface) { - return FALSE; + $cache[$nodeId] = FALSE; + return $cache[$nodeId]; } - return !$latest->isPublished(); + $cache[$nodeId] = !$latest->isPublished(); + return $cache[$nodeId]; } /** diff --git a/docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php b/docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php index 7afd86dc90..e76e55b119 100644 --- a/docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php +++ b/docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php @@ -375,9 +375,9 @@ public function testManagerIsIdempotentAfterNormalization(): void { } /** - * Tests command options constrain output by entity type and bundle. + * Tests command bundle filter constrains output. */ - public function testCommandOptionsEntityTypeAndBundleFiltering(): void { + public function testCommandBundleFiltering(): void { $target = $this->createNode([ 'type' => 'org_page', 'title' => $this->randomMachineName(), @@ -425,7 +425,6 @@ public function testCommandOptionsEntityTypeAndBundleFiltering(): void { \Drupal::service('mass_redirect_normalizer.manager') ); $rowsObj = $command->normalizeRedirectLinks([ - 'entity-type' => 'node', 'bundle' => 'page', 'entity-ids' => (string) $page->id(), 'limit' => 0, @@ -435,7 +434,6 @@ public function testCommandOptionsEntityTypeAndBundleFiltering(): void { $this->assertNotEmpty($rows); foreach ($rows as $row) { - $this->assertSame('node', $row['entity_type']); $this->assertSame('page', $row['bundle']); $this->assertSame('would_update', $row['status']); $this->assertNotSame($row['before'], $row['after']); @@ -470,7 +468,6 @@ public function testCommandSkipsUnpublishedNode(): void { \Drupal::service('mass_redirect_normalizer.manager') ); $rowsObj = $command->normalizeRedirectLinks([ - 'entity-type' => 'node', 'entity-ids' => (string) $unpublished->id(), 'simulate' => TRUE, ]);