From 530dea064ab1fd82a64753c3474ce4c5878f3af3 Mon Sep 17 00:00:00 2001 From: Arthur Baghdasaryan Date: Wed, 1 Apr 2026 10:12:27 +0400 Subject: [PATCH 1/7] DP-45831: Update intenal links on mass.gov that are redirects --- conf/drupal/config/core.extension.yml | 1 + .../custom/mass_redirect_normalizer/README.md | 30 ++ .../mass_redirect_normalizer.info.yml | 9 + .../mass_redirect_normalizer.module | 20 + .../mass_redirect_normalizer.services.yml | 12 + .../MassRedirectNormalizerCommands.php | 137 +++++ .../src/RedirectLinkNormalizationManager.php | 101 ++++ .../src/RedirectLinkResolver.php | 208 ++++++++ .../RedirectLinkNormalizationTest.php | 485 ++++++++++++++++++ 9 files changed, 1003 insertions(+) create mode 100644 docroot/modules/custom/mass_redirect_normalizer/README.md create mode 100644 docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.info.yml create mode 100644 docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.module create mode 100644 docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.services.yml create mode 100644 docroot/modules/custom/mass_redirect_normalizer/src/Drush/Commands/MassRedirectNormalizerCommands.php create mode 100644 docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php create mode 100644 docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkResolver.php create mode 100644 docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php diff --git a/conf/drupal/config/core.extension.yml b/conf/drupal/config/core.extension.yml index fedd4e098a..777f373864 100644 --- a/conf/drupal/config/core.extension.yml +++ b/conf/drupal/config/core.extension.yml @@ -145,6 +145,7 @@ module: mass_microsites: 0 mass_more_lists: 0 mass_nav: 0 + mass_redirect_normalizer: 0 mass_redirects: 0 mass_scheduled_transitions: 0 mass_schema_apply_action: 0 diff --git a/docroot/modules/custom/mass_redirect_normalizer/README.md b/docroot/modules/custom/mass_redirect_normalizer/README.md new file mode 100644 index 0000000000..3d17b30d03 --- /dev/null +++ b/docroot/modules/custom/mass_redirect_normalizer/README.md @@ -0,0 +1,30 @@ +# Redirect Link Normalizer + +This module rewrites internal links that currently rely on redirects so they +point to the final destination path directly. For rich text links, the process +also adds `data-entity-*` attributes when the final target resolves to a node. + +## Manual execution + +- Dry run: + - `ddev drush --simulate mass-redirect-normalizer:normalize-links --limit=500` +- Execute: + - `ddev drush mass-redirect-normalizer:normalize-links --limit=5000` +- Optional filters: + - `--entity-type=node|paragraph|all` + - `--bundle=` + - `--show-unchanged` + +## Periodic execution + +For one-time/periodic bulk cleanup, run the Drush command above. + +For ongoing maintenance, this module also normalizes links during entity save +via `hook_entity_presave()` for nodes and paragraphs. This means new edits +automatically store final target paths instead of redirecting paths. + +## Post-run usage refresh + +For large backfills, regenerate entity usage to refresh orphan reports: + +- `ddev drush mass-content:usage-regenerate --batch-size=1000` diff --git a/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.info.yml b/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.info.yml new file mode 100644 index 0000000000..362c28d93b --- /dev/null +++ b/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.info.yml @@ -0,0 +1,9 @@ +name: Mass Redirect Normalizer +type: module +description: Normalize internal links that point at redirects to their final targets. +core_version_requirement: ^10 || ^11 +package: Custom +dependencies: + - mass_fields:mass_fields + - mass_content:mass_content + diff --git a/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.module b/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.module new file mode 100644 index 0000000000..73926a9569 --- /dev/null +++ b/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.module @@ -0,0 +1,20 @@ +normalizeEntity($entity, FALSE); +} + diff --git a/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.services.yml b/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.services.yml new file mode 100644 index 0000000000..3c60356b95 --- /dev/null +++ b/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.services.yml @@ -0,0 +1,12 @@ +services: + mass_redirect_normalizer.resolver: + class: Drupal\mass_redirect_normalizer\RedirectLinkResolver + arguments: ['@entity_type.manager', '@path_alias.manager', '@request_stack', '@router.request_context'] + + mass_redirect_normalizer.manager: + class: Drupal\mass_redirect_normalizer\RedirectLinkNormalizationManager + arguments: ['@mass_redirect_normalizer.resolver', '@datetime.time'] + + Drupal\mass_redirect_normalizer\RedirectLinkNormalizationManager: + alias: mass_redirect_normalizer.manager + diff --git a/docroot/modules/custom/mass_redirect_normalizer/src/Drush/Commands/MassRedirectNormalizerCommands.php b/docroot/modules/custom/mass_redirect_normalizer/src/Drush/Commands/MassRedirectNormalizerCommands.php new file mode 100644 index 0000000000..919a9c0a88 --- /dev/null +++ b/docroot/modules/custom/mass_redirect_normalizer/src/Drush/Commands/MassRedirectNormalizerCommands.php @@ -0,0 +1,137 @@ + 0, 'entity-type' => 'all', 'bundle' => NULL, 'show-unchanged' => FALSE]): RowsOfFields { + $_ENV['MASS_FLAGGING_BYPASS'] = TRUE; + $entityTypes = $options['entity-type'] === 'all' ? ['node', 'paragraph'] : [(string) $options['entity-type']]; + $limit = max(0, (int) $options['limit']); + $showUnchanged = !empty($options['show-unchanged']); + try { + $simulate = Drush::simulate(); + } + catch (\RuntimeException) { + // Allow direct invocation in PHPUnit without Drush bootstrap. + $simulate = FALSE; + } + $rows = []; + $processed = 0; + $changed = 0; + $skipped = 0; + + foreach ($entityTypes as $entityType) { + if (!in_array($entityType, ['node', 'paragraph'], TRUE)) { + $rows[] = [ + 'status' => 'unsupported', + 'entity_type' => $entityType, + 'entity_id' => 'N/A', + 'bundle' => 'N/A', + 'details' => 'Unsupported entity type', + ]; + continue; + } + + $idField = $entityType === 'node' ? 'nid' : 'id'; + $query = $this->entityTypeManager->getStorage($entityType)->getQuery() + ->accessCheck(FALSE) + ->sort($idField); + if (!empty($options['bundle'])) { + $query->condition('type', $options['bundle']); + } + if ($limit > 0) { + $query->range(0, $limit); + } + $ids = $query->execute(); + + foreach ($ids as $id) { + $entity = $this->entityTypeManager->getStorage($entityType)->load($id); + if (!$entity) { + continue; + } + + $result = $this->normalizerManager->normalizeEntity($entity, !$simulate); + $processed++; + if (!empty($result['changed'])) { + $changed++; + $rows[] = [ + 'status' => $simulate ? 'would_update' : 'updated', + 'entity_type' => $entityType, + 'entity_id' => $id, + 'bundle' => $entity->bundle(), + 'details' => 'Redirect-based links normalized', + ]; + } + elseif (!empty($result['skipped'])) { + $skipped++; + $rows[] = [ + 'status' => 'skipped', + 'entity_type' => $entityType, + 'entity_id' => $id, + 'bundle' => $entity->bundle(), + 'details' => 'Orphan paragraph skipped', + ]; + } + elseif ($showUnchanged) { + $rows[] = [ + 'status' => 'unchanged', + 'entity_type' => $entityType, + 'entity_id' => $id, + 'bundle' => $entity->bundle(), + 'details' => 'No redirect-based links found', + ]; + } + } + } + + $mode = $simulate ? 'SIMULATION' : 'EXECUTION'; + $rows[] = [ + 'status' => 'summary', + 'entity_type' => 'all', + 'entity_id' => (string) $processed, + 'bundle' => 'N/A', + 'details' => "{$mode} complete. changed={$changed}; skipped={$skipped}", + ]; + + return new RowsOfFields($rows); + } + +} + diff --git a/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php b/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php new file mode 100644 index 0000000000..50e573c746 --- /dev/null +++ b/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php @@ -0,0 +1,101 @@ + FALSE, 'skipped' => TRUE]; + } + + $changed = FALSE; + foreach ($entity->getFields() as $field) { + $fieldType = $field->getFieldDefinition()->getType(); + if (in_array($fieldType, ['text_long', 'text_with_summary', 'string_long'], TRUE)) { + foreach ($field as $item) { + if (!isset($item->value) || $item->value === NULL || $item->value === '') { + continue; + } + $processed = $this->resolver->normalizeRedirectLinksInText($item->value); + if ($processed['changed']) { + $item->value = $processed['text']; + $changed = TRUE; + } + } + } + elseif ($fieldType === 'link') { + foreach ($field as $item) { + if (empty($item->uri)) { + continue; + } + $processed = $this->resolver->normalizeRedirectLinkUri($item->uri); + if ($processed['changed']) { + $item->uri = $processed['uri']; + $changed = TRUE; + } + } + } + } + + if (!$changed || !$save) { + return ['changed' => $changed, 'skipped' => FALSE]; + } + + if (method_exists($entity, 'setNewRevision')) { + call_user_func([$entity, 'setNewRevision']); + } + if (method_exists($entity, 'setRevisionLogMessage')) { + call_user_func([$entity, 'setRevisionLogMessage'], 'Revision created to normalize redirected internal links.'); + } + if (method_exists($entity, 'setRevisionCreationTime')) { + call_user_func([$entity, 'setRevisionCreationTime'], $this->time->getRequestTime()); + } + $entity->save(); + + if ($entity->getEntityTypeId() === 'paragraph' && $node = Helper::getParentNode($entity)) { + if (method_exists($node, 'setNewRevision')) { + $node->setNewRevision(); + } + if (method_exists($node, 'setRevisionLogMessage')) { + $node->setRevisionLogMessage('Revision created to normalize redirected internal links in nested content.'); + } + if (method_exists($node, 'setRevisionCreationTime')) { + $node->setRevisionCreationTime($this->time->getRequestTime()); + } + $node->save(); + } + + return ['changed' => TRUE, 'skipped' => FALSE]; + } + +} + diff --git a/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkResolver.php b/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkResolver.php new file mode 100644 index 0000000000..890eddb4e8 --- /dev/null +++ b/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkResolver.php @@ -0,0 +1,208 @@ +query('//a[@href]') as $anchor) { + if (!$anchor instanceof \DOMElement) { + continue; + } + $href = (string) $anchor->getAttribute('href'); + $resolved = $this->resolveRedirectTarget($href); + if (!$resolved['changed']) { + continue; + } + + $anchor->setAttribute('href', $resolved['target_path']); + if (!empty($resolved['node'])) { + $anchor->setAttribute('data-entity-uuid', $resolved['node']->uuid()); + $anchor->setAttribute('data-entity-substitution', 'canonical'); + $anchor->setAttribute('data-entity-type', 'node'); + } + $changed = TRUE; + } + + return [ + 'changed' => $changed, + 'text' => Html::serialize($dom), + ]; + } + + /** + * Normalizes redirected internal links in link fields. + */ + public function normalizeRedirectLinkUri(string $uri): array { + $resolved = $this->resolveRedirectTarget($uri); + if (!$resolved['changed']) { + return [ + 'changed' => FALSE, + 'uri' => $uri, + ]; + } + + return [ + 'changed' => TRUE, + 'uri' => 'internal:' . $resolved['target_path'], + ]; + } + + /** + * Resolves an internal URL/path through redirect chains. + */ + public function resolveRedirectTarget(string $url, int $maxDepth = 10): array { + $parsed = parse_url($url) ?: []; + $sourcePath = $this->extractLocalPath($url); + if (!$sourcePath) { + return ['changed' => FALSE]; + } + + $query = empty($parsed['query']) ? '' : '?' . $parsed['query']; + $fragment = empty($parsed['fragment']) ? '' : '#' . $parsed['fragment']; + + $current = ltrim($sourcePath, '/'); + $visited = []; + + for ($i = 0; $i < $maxDepth; $i++) { + if (isset($visited[$current])) { + break; + } + $visited[$current] = TRUE; + + $redirect = $this->loadRedirectBySourcePath($current); + if (!$redirect instanceof Redirect) { + break; + } + + $next = $this->extractLocalPath($redirect->getRedirectUrl()->toString()); + if (!$next) { + break; + } + $current = ltrim($next, '/'); + } + + $finalPath = '/' . ltrim($current, '/'); + $targetPath = $finalPath . $query . $fragment; + $sourceNormalized = '/' . ltrim($sourcePath, '/') . $query . $fragment; + if ($targetPath === $sourceNormalized) { + return ['changed' => FALSE]; + } + + $node = NULL; + $internalPath = $this->pathAliasManager->getPathByAlias($finalPath); + if (preg_match('/^\/node\/(\d+)$/', $internalPath, $matches)) { + $node = $this->entityTypeManager->getStorage('node')->load((int) $matches[1]); + } + + return [ + 'changed' => TRUE, + 'target_path' => $targetPath, + 'node' => $node, + ]; + } + + /** + * Extracts local path from URL/URI; returns NULL for non-local hosts. + */ + private function extractLocalPath(string $url): ?string { + if (str_starts_with($url, 'internal:')) { + $path = (string) parse_url(substr($url, strlen('internal:')), PHP_URL_PATH); + return '/' . ltrim($path, '/'); + } + + if (str_starts_with($url, '/')) { + $path = (string) parse_url($url, PHP_URL_PATH); + return '/' . ltrim($path, '/'); + } + + if (!UrlHelper::isExternal($url)) { + $path = (string) parse_url($url, PHP_URL_PATH); + return '/' . ltrim($path, '/'); + } + + $parts = parse_url($url); + $host = strtolower((string) ($parts['host'] ?? '')); + $knownHosts = ['mass.gov', 'www.mass.gov']; + if ($this->requestStack->getCurrentRequest()) { + $knownHosts[] = strtolower((string) $this->requestStack->getCurrentRequest()->getHost()); + } + $requestContextHost = strtolower((string) $this->requestContext->getHost()); + if ($requestContextHost !== '') { + $knownHosts[] = $requestContextHost; + } + if (!in_array($host, array_filter($knownHosts), TRUE)) { + return NULL; + } + + $path = $parts['path'] ?? '/'; + return '/' . ltrim((string) $path, '/'); + } + + /** + * Loads redirect by source path, tolerating leading slash differences. + */ + private function loadRedirectBySourcePath(string $sourcePath): ?Redirect { + $sourcePath = trim($sourcePath); + if ($sourcePath === '') { + return NULL; + } + + $candidates = [ + ltrim($sourcePath, '/'), + '/' . ltrim($sourcePath, '/'), + ]; + + $storage = $this->entityTypeManager->getStorage('redirect'); + foreach ($candidates as $candidate) { + $query = $storage->getQuery() + ->accessCheck(FALSE) + ->range(0, 1); + $group = $query->orConditionGroup() + ->condition('redirect_source.path', $candidate) + ->condition('redirect_source__path', $candidate); + $ids = $query->condition($group)->execute(); + if (!$ids) { + continue; + } + + $redirect = $storage->load((int) reset($ids)); + if ($redirect instanceof Redirect) { + return $redirect; + } + } + + return NULL; + } + +} + diff --git a/docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php b/docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php new file mode 100644 index 0000000000..7a021b4ab2 --- /dev/null +++ b/docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php @@ -0,0 +1,485 @@ +randomMachineName(); + $sourceFinal = 'chain-final-' . $this->randomMachineName(); + + $secondHop = Redirect::create(); + $secondHop->setRedirect('node/' . $target->id()); + $secondHop->setSource($sourceFinal); + $secondHop->setLanguage($target->language()->getId()); + $secondHop->setStatusCode(\Drupal::config('redirect.settings')->get('default_status_code')); + $secondHop->save(); + $this->cleanupEntities[] = $secondHop; + + $firstHop = Redirect::create(); + $firstHop->setRedirect('/' . $sourceFinal); + $firstHop->setSource($sourceStart); + $firstHop->setLanguage($target->language()->getId()); + $firstHop->setStatusCode(\Drupal::config('redirect.settings')->get('default_status_code')); + $firstHop->save(); + $this->cleanupEntities[] = $firstHop; + + return [$sourceStart, $sourceFinal]; + } + + /** + * Tests redirect chain resolution and rich-text rewriting. + */ + public function testRedirectChainNormalizationInText(): void { + $target = $this->createNode([ + 'type' => 'org_page', + 'title' => $this->randomMachineName(), + 'status' => 1, + 'moderation_state' => 'published', + ]); + [$sourceStart, $sourceFinal] = $this->createRedirectChain($target); + + $redirectStorage = \Drupal::entityTypeManager()->getStorage('redirect'); + $matching = $redirectStorage->loadByProperties([ + 'redirect_source__path' => $sourceStart, + ]); + $this->assertNotEmpty($matching); + + /** @var \Drupal\mass_redirect_normalizer\RedirectLinkResolver $service */ + $service = \Drupal::service('mass_redirect_normalizer.resolver'); + $resolved = $service->resolveRedirectTarget('/' . $sourceStart . '?foo=1#bar'); + $targetPath = $target->toUrl()->toString(); + $this->assertTrue($resolved['changed']); + $this->assertStringContainsString($targetPath, $resolved['target_path']); + $this->assertStringContainsString('?foo=1', $resolved['target_path']); + $this->assertStringContainsString('#bar', $resolved['target_path']); + $this->assertNotEmpty($resolved['node']); + $this->assertEquals($target->id(), $resolved['node']->id()); + + $html = '

Test link

'; + $normalized = $service->normalizeRedirectLinksInText($html); + $this->assertTrue($normalized['changed']); + $this->assertStringContainsString($targetPath, $normalized['text']); + $this->assertStringContainsString('data-entity-type="node"', $normalized['text']); + $this->assertStringContainsString('data-entity-substitution="canonical"', $normalized['text']); + $this->assertStringContainsString('data-entity-uuid="' . $target->uuid() . '"', $normalized['text']); + } + + /** + * Tests link-field URI normalization to final internal path. + */ + public function testNormalizeRedirectLinkUri(): void { + $target = $this->createNode([ + 'type' => 'org_page', + 'title' => $this->randomMachineName(), + 'status' => 1, + 'moderation_state' => 'published', + ]); + [$sourceStart] = $this->createRedirectChain($target); + + /** @var \Drupal\mass_redirect_normalizer\RedirectLinkResolver $service */ + $service = \Drupal::service('mass_redirect_normalizer.resolver'); + $normalized = $service->normalizeRedirectLinkUri('internal:/' . $sourceStart . '?x=1#frag'); + $this->assertTrue($normalized['changed']); + $this->assertStringStartsWith('internal:/', $normalized['uri']); + $this->assertStringContainsString($target->toUrl()->toString(), $normalized['uri']); + $this->assertStringContainsString('?x=1', $normalized['uri']); + $this->assertStringContainsString('#frag', $normalized['uri']); + } + + /** + * Tests presave hook normalizes node rich-text links on save. + */ + public function testPresaveHookNormalizesNodeBodyOnSave(): void { + $target = $this->createNode([ + 'type' => 'org_page', + 'title' => $this->randomMachineName(), + 'status' => 1, + 'moderation_state' => 'published', + ]); + [$sourceStart] = $this->createRedirectChain($target); + + $sourceNode = $this->createNode([ + 'type' => 'page', + 'title' => $this->randomMachineName(), + 'status' => 1, + 'moderation_state' => 'published', + 'body' => [ + 'value' => '

Normalize me

', + 'format' => 'full_html', + ], + ]); + + // Trigger presave normalization on node save. + $sourceNode->save(); + + $reloaded = \Drupal::entityTypeManager()->getStorage('node')->load($sourceNode->id()); + $this->assertNotNull($reloaded); + /** @var \Drupal\node\NodeInterface $reloaded */ + $body = (string) $reloaded->get('body')->value; + $this->assertStringContainsString($target->toUrl()->toString(), $body); + $this->assertStringContainsString('data-entity-type="node"', $body); + } + + /** + * Tests looped redirects do not cause infinite processing. + */ + public function testRedirectLoopIsSafelyIgnored(): void { + $loopA = 'loop-a-' . $this->randomMachineName(); + $loopB = 'loop-b-' . $this->randomMachineName(); + + $a = Redirect::create(); + $a->setRedirect('/' . $loopB); + $a->setSource($loopA); + $a->setLanguage('en'); + $a->setStatusCode(\Drupal::config('redirect.settings')->get('default_status_code')); + $a->save(); + $this->cleanupEntities[] = $a; + + $b = Redirect::create(); + $b->setRedirect('/' . $loopA); + $b->setSource($loopB); + $b->setLanguage('en'); + $b->setStatusCode(\Drupal::config('redirect.settings')->get('default_status_code')); + $b->save(); + $this->cleanupEntities[] = $b; + + /** @var \Drupal\mass_redirect_normalizer\RedirectLinkResolver $service */ + $service = \Drupal::service('mass_redirect_normalizer.resolver'); + $resolved = $service->resolveRedirectTarget('/' . $loopA . '?x=1#frag'); + + $this->assertFalse($resolved['changed']); + } + + /** + * Tests external URLs are ignored. + */ + public function testExternalUrlIsIgnored(): void { + /** @var \Drupal\mass_redirect_normalizer\RedirectLinkResolver $service */ + $service = \Drupal::service('mass_redirect_normalizer.resolver'); + + $resolved = $service->resolveRedirectTarget('https://example.com/somewhere'); + $this->assertFalse($resolved['changed']); + + $text = '

External

'; + $normalized = $service->normalizeRedirectLinksInText($text); + $this->assertFalse($normalized['changed']); + $this->assertStringContainsString('https://example.com/somewhere', $normalized['text']); + } + + /** + * Tests non-redirect local links remain unchanged. + */ + public function testNonRedirectLocalLinkRemainsUnchanged(): void { + /** @var \Drupal\mass_redirect_normalizer\RedirectLinkResolver $service */ + $service = \Drupal::service('mass_redirect_normalizer.resolver'); + + $resolved = $service->resolveRedirectTarget('/this-path-does-not-redirect'); + $this->assertFalse($resolved['changed']); + + $uriNormalized = $service->normalizeRedirectLinkUri('internal:/this-path-does-not-redirect'); + $this->assertFalse($uriNormalized['changed']); + $this->assertSame('internal:/this-path-does-not-redirect', $uriNormalized['uri']); + } + + /** + * Tests max-depth limit prevents over-following deep chains. + */ + public function testResolveRedirectTargetRespectsMaxDepth(): void { + $target = $this->createNode([ + 'type' => 'org_page', + 'title' => $this->randomMachineName(), + 'status' => 1, + 'moderation_state' => 'published', + ]); + + $p1 = 'depth-a-' . $this->randomMachineName(); + $p2 = 'depth-b-' . $this->randomMachineName(); + $p3 = 'depth-c-' . $this->randomMachineName(); + + $r1 = Redirect::create(); + $r1->setSource($p1); + $r1->setRedirect('/' . $p2); + $r1->setLanguage('en'); + $r1->setStatusCode(301); + $r1->save(); + $this->cleanupEntities[] = $r1; + + $r2 = Redirect::create(); + $r2->setSource($p2); + $r2->setRedirect('/' . $p3); + $r2->setLanguage('en'); + $r2->setStatusCode(301); + $r2->save(); + $this->cleanupEntities[] = $r2; + + $r3 = Redirect::create(); + $r3->setSource($p3); + $r3->setRedirect('node/' . $target->id()); + $r3->setLanguage('en'); + $r3->setStatusCode(301); + $r3->save(); + $this->cleanupEntities[] = $r3; + + /** @var \Drupal\mass_redirect_normalizer\RedirectLinkResolver $service */ + $service = \Drupal::service('mass_redirect_normalizer.resolver'); + $limited = $service->resolveRedirectTarget('/' . $p1, 1); + $this->assertTrue($limited['changed']); + $this->assertSame('/' . $p2, $limited['target_path']); + + $full = $service->resolveRedirectTarget('/' . $p1, 10); + $this->assertTrue($full['changed']); + $this->assertStringContainsString($target->toUrl()->toString(), $full['target_path']); + } + + /** + * Tests redirecting to external target is ignored for rewriting. + */ + public function testRedirectToExternalTargetIsIgnored(): void { + $source = 'to-external-' . $this->randomMachineName(); + $redirect = Redirect::create(); + $redirect->setSource($source); + $redirect->setRedirect('https://example.com/outside'); + $redirect->setLanguage('en'); + $redirect->setStatusCode(301); + $redirect->save(); + $this->cleanupEntities[] = $redirect; + + /** @var \Drupal\mass_redirect_normalizer\RedirectLinkResolver $service */ + $service = \Drupal::service('mass_redirect_normalizer.resolver'); + $resolved = $service->resolveRedirectTarget('/' . $source); + $this->assertFalse($resolved['changed']); + } + + /** + * Tests alias-like final target rewrites href but does not add node metadata. + */ + public function testAliasTargetWithoutNodeDoesNotAddEntityMetadata(): void { + $source = 'to-alias-' . $this->randomMachineName(); + $redirect = Redirect::create(); + $redirect->setSource($source); + $redirect->setRedirect('/some/non-node-alias'); + $redirect->setLanguage('en'); + $redirect->setStatusCode(301); + $redirect->save(); + $this->cleanupEntities[] = $redirect; + + /** @var \Drupal\mass_redirect_normalizer\RedirectLinkResolver $service */ + $service = \Drupal::service('mass_redirect_normalizer.resolver'); + $text = '

Alias link

'; + $normalized = $service->normalizeRedirectLinksInText($text); + + $this->assertTrue($normalized['changed']); + $this->assertStringContainsString('/some/non-node-alias', $normalized['text']); + $this->assertStringNotContainsString('data-entity-type="node"', $normalized['text']); + $this->assertStringNotContainsString('data-entity-uuid=', $normalized['text']); + } + + /** + * Tests manager idempotency after first normalization. + */ + public function testManagerIsIdempotentAfterNormalization(): void { + $target = $this->createNode([ + 'type' => 'org_page', + 'title' => $this->randomMachineName(), + 'status' => 1, + 'moderation_state' => 'published', + ]); + [$sourceStart] = $this->createRedirectChain($target); + + $node = $this->createNode([ + 'type' => 'page', + 'title' => $this->randomMachineName(), + 'status' => 1, + 'moderation_state' => 'published', + 'body' => [ + 'value' => '

No redirect yet

', + 'format' => 'full_html', + ], + ]); + $node->set('body', [ + 'value' => '

Run twice

', + 'format' => 'full_html', + ]); + + /** @var \Drupal\mass_redirect_normalizer\RedirectLinkNormalizationManager $manager */ + $manager = \Drupal::service('mass_redirect_normalizer.manager'); + $first = $manager->normalizeEntity($node, TRUE); + $this->assertTrue($first['changed']); + + $reloaded = \Drupal::entityTypeManager()->getStorage('node')->load($node->id()); + $second = $manager->normalizeEntity($reloaded, TRUE); + $this->assertFalse($second['changed']); + } + + /** + * Tests command options constrain output by entity type and bundle. + */ + public function testCommandOptionsEntityTypeAndBundleFiltering(): void { + $target = $this->createNode([ + 'type' => 'org_page', + 'title' => $this->randomMachineName(), + 'status' => 1, + 'moderation_state' => 'published', + ]); + [$sourceStart] = $this->createRedirectChain($target); + + $page = $this->createNode([ + 'type' => 'page', + 'title' => $this->randomMachineName(), + 'status' => 1, + 'moderation_state' => 'published', + 'body' => [ + 'value' => '

Node-only

', + 'format' => 'full_html', + ], + ]); + + $command = new MassRedirectNormalizerCommands( + \Drupal::entityTypeManager(), + \Drupal::service('mass_redirect_normalizer.manager') + ); + $rowsObj = $command->normalizeRedirectLinks([ + 'entity-type' => 'node', + 'bundle' => 'page', + 'limit' => 0, + 'show-unchanged' => TRUE, + ]); + $rows = method_exists($rowsObj, 'getArrayCopy') ? $rowsObj->getArrayCopy() : iterator_to_array($rowsObj); + + $this->assertNotEmpty($rows); + $nonSummaryRows = array_filter($rows, fn($row) => ($row['status'] ?? '') !== 'summary'); + $this->assertNotEmpty($nonSummaryRows); + foreach ($nonSummaryRows as $row) { + $this->assertSame('node', $row['entity_type']); + $this->assertSame('page', $row['bundle']); + } + } + + /** + * Tests absolute local URL link-field normalization. + */ + public function testNormalizeAbsoluteLocalUrlLinkField(): void { + $target = $this->createNode([ + 'type' => 'org_page', + 'title' => $this->randomMachineName(), + 'status' => 1, + 'moderation_state' => 'published', + ]); + [$sourceStart] = $this->createRedirectChain($target); + + /** @var \Drupal\mass_redirect_normalizer\RedirectLinkResolver $service */ + $service = \Drupal::service('mass_redirect_normalizer.resolver'); + $normalized = $service->normalizeRedirectLinkUri('https://www.mass.gov/' . $sourceStart . '?q=1#x'); + + $this->assertTrue($normalized['changed']); + $this->assertStringStartsWith('internal:/', $normalized['uri']); + $this->assertStringContainsString($target->toUrl()->toString(), $normalized['uri']); + $this->assertStringContainsString('?q=1', $normalized['uri']); + $this->assertStringContainsString('#x', $normalized['uri']); + } + + /** + * Tests mixed multi-value link field normalization on one entity. + */ + public function testManagerNormalizesOnlyRedirectingLinksInMultiValueField(): void { + $target = $this->createNode([ + 'type' => 'org_page', + 'title' => $this->randomMachineName(), + 'status' => 1, + 'moderation_state' => 'published', + ]); + [$sourceStart] = $this->createRedirectChain($target); + + $node = $this->createNode([ + 'type' => 'org_page', + 'title' => $this->randomMachineName(), + 'status' => 1, + 'moderation_state' => 'published', + 'field_social_links' => [ + ['uri' => 'internal:/no-redirect-here', 'title' => 'unchanged-local'], + ], + ]); + $node->set('field_social_links', [ + ['uri' => 'internal:/' . $sourceStart, 'title' => 'redirecting'], + ['uri' => 'internal:/no-redirect-here', 'title' => 'unchanged-local'], + ['uri' => 'https://example.com/external', 'title' => 'external'], + ]); + + /** @var \Drupal\mass_redirect_normalizer\RedirectLinkNormalizationManager $manager */ + $manager = \Drupal::service('mass_redirect_normalizer.manager'); + $result = $manager->normalizeEntity($node, TRUE); + $this->assertTrue($result['changed']); + + $reloaded = \Drupal::entityTypeManager()->getStorage('node')->load($node->id()); + $links = $reloaded->get('field_social_links')->getValue(); + + $this->assertStringContainsString($target->toUrl()->toString(), $links[0]['uri']); + $this->assertSame('internal:/no-redirect-here', $links[1]['uri']); + $this->assertSame('https://example.com/external', $links[2]['uri']); + } + + /** + * Tests link item metadata (title/options) is preserved. + */ + public function testLinkItemMetadataIsPreservedDuringNormalization(): void { + $target = $this->createNode([ + 'type' => 'org_page', + 'title' => $this->randomMachineName(), + 'status' => 1, + 'moderation_state' => 'published', + ]); + [$sourceStart] = $this->createRedirectChain($target); + + $options = [ + 'attributes' => [ + 'class' => ['my-link-class'], + ], + ]; + $node = $this->createNode([ + 'type' => 'org_page', + 'title' => $this->randomMachineName(), + 'status' => 1, + 'moderation_state' => 'published', + 'field_social_links' => [ + [ + 'uri' => 'internal:/no-redirect-yet', + 'title' => 'initial-title', + ], + ], + ]); + $node->set('field_social_links', [ + [ + 'uri' => 'internal:/' . $sourceStart, + 'title' => 'keep-title', + 'options' => $options, + ], + ]); + + /** @var \Drupal\mass_redirect_normalizer\RedirectLinkNormalizationManager $manager */ + $manager = \Drupal::service('mass_redirect_normalizer.manager'); + $result = $manager->normalizeEntity($node, TRUE); + $this->assertTrue($result['changed']); + + $reloaded = \Drupal::entityTypeManager()->getStorage('node')->load($node->id()); + $item = $reloaded->get('field_social_links')->first(); + $this->assertNotNull($item); + $this->assertSame('keep-title', $item->title); + $this->assertSame('my-link-class', $item->options['attributes']['class'][0]); + $this->assertStringContainsString($target->toUrl()->toString(), $item->uri); + } + +} + From f93494becb8b60f86c8452b46fc8c6ff1e68e63b Mon Sep 17 00:00:00 2001 From: Arthur Baghdasaryan Date: Thu, 2 Apr 2026 14:27:17 +0400 Subject: [PATCH 2/7] PHPCS --- .../mass_redirect_normalizer/mass_redirect_normalizer.module | 1 - .../src/Drush/Commands/MassRedirectNormalizerCommands.php | 1 - .../src/RedirectLinkNormalizationManager.php | 1 - .../custom/mass_redirect_normalizer/src/RedirectLinkResolver.php | 1 - .../tests/src/ExistingSite/RedirectLinkNormalizationTest.php | 1 - 5 files changed, 5 deletions(-) diff --git a/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.module b/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.module index 73926a9569..416e791e7b 100644 --- a/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.module +++ b/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.module @@ -17,4 +17,3 @@ function mass_redirect_normalizer_entity_presave(EntityInterface $entity) { // In presave we mutate field values only; parent entity handles persistence. $manager->normalizeEntity($entity, FALSE); } - diff --git a/docroot/modules/custom/mass_redirect_normalizer/src/Drush/Commands/MassRedirectNormalizerCommands.php b/docroot/modules/custom/mass_redirect_normalizer/src/Drush/Commands/MassRedirectNormalizerCommands.php index 919a9c0a88..c894b69e14 100644 --- a/docroot/modules/custom/mass_redirect_normalizer/src/Drush/Commands/MassRedirectNormalizerCommands.php +++ b/docroot/modules/custom/mass_redirect_normalizer/src/Drush/Commands/MassRedirectNormalizerCommands.php @@ -134,4 +134,3 @@ public function normalizeRedirectLinks($options = ['limit' => 0, 'entity-type' = } } - diff --git a/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php b/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php index 50e573c746..eec535e759 100644 --- a/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php +++ b/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php @@ -98,4 +98,3 @@ public function normalizeEntity(ContentEntityInterface $entity, bool $save = TRU } } - diff --git a/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkResolver.php b/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkResolver.php index 890eddb4e8..b0cceb737b 100644 --- a/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkResolver.php +++ b/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkResolver.php @@ -205,4 +205,3 @@ private function loadRedirectBySourcePath(string $sourcePath): ?Redirect { } } - diff --git a/docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php b/docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php index 7a021b4ab2..f5e6618975 100644 --- a/docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php +++ b/docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php @@ -482,4 +482,3 @@ public function testLinkItemMetadataIsPreservedDuringNormalization(): void { } } - From 338dd0aa401a313eaf91523cfba1041120404aa4 Mon Sep 17 00:00:00 2001 From: Arthur Baghdasaryan Date: Thu, 2 Apr 2026 14:28:17 +0400 Subject: [PATCH 3/7] Changelog --- changelogs/DP-45831.yml | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 changelogs/DP-45831.yml diff --git a/changelogs/DP-45831.yml b/changelogs/DP-45831.yml new file mode 100644 index 0000000000..ce2212ad7a --- /dev/null +++ b/changelogs/DP-45831.yml @@ -0,0 +1,41 @@ +# +# Write your changelog entry here. Every pull request must have a changelog yml file. +# +# Change types: +# ############################################################################# +# You can use one of the following types: +# - Added: For new features. +# - Changed: For changes to existing functionality. +# - Deprecated: For soon-to-be removed features. +# - Removed: For removed features. +# - Fixed: For any bug fixes. +# - Security: In case of vulnerabilities. +# +# Format +# ############################################################################# +# The format is crucial. Please follow the examples below. For reference, the requirements are: +# - All 3 parts are required and you must include "Type", "description" and "issue". +# - "Type" must be left aligned and followed by a colon. +# - "description" must be indented with 2 spaces followed by a colon +# - "issue" must be indented with 4 spaces followed by a colon. +# - "issue" is for the Jira ticket number only e.g. DP-1234 +# - No extra spaces, indents, or blank lines are allowed. +# +# Example: +# ############################################################################# +# Fixed: +# - description: Fixes scrolling on edit pages in Safari. +# issue: DP-13314 +# +# You may add more than 1 description & issue for each type using the following format: +# Changed: +# - description: Automating the release branch. +# issue: DP-10166 +# - description: Second change item that needs a description. +# issue: DP-19875 +# - description: Third change item that needs a description along with an issue. +# issue: DP-19843 +# +Changed: + - description: Update intenal links on mass.gov that are redirects. + issue: DP-45831 From 92c782354e03fec88162d122e0adc28dc760934e Mon Sep 17 00:00:00 2001 From: Arthur Baghdasaryan Date: Thu, 2 Apr 2026 14:33:08 +0400 Subject: [PATCH 4/7] Change --- .../src/RedirectLinkNormalizationManager.php | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php b/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php index eec535e759..b903e4554a 100644 --- a/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php +++ b/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php @@ -4,6 +4,8 @@ use Drupal\Component\Datetime\TimeInterface; use Drupal\Core\Entity\ContentEntityInterface; +use Drupal\Core\Entity\RevisionLogInterface; +use Drupal\Core\Entity\RevisionableInterface; use Drupal\mayflower\Helper; use Drupal\paragraphs\Entity\Paragraph; @@ -70,14 +72,12 @@ public function normalizeEntity(ContentEntityInterface $entity, bool $save = TRU return ['changed' => $changed, 'skipped' => FALSE]; } - if (method_exists($entity, 'setNewRevision')) { - call_user_func([$entity, 'setNewRevision']); + if ($entity instanceof RevisionableInterface) { + $entity->setNewRevision(); } - if (method_exists($entity, 'setRevisionLogMessage')) { - call_user_func([$entity, 'setRevisionLogMessage'], 'Revision created to normalize redirected internal links.'); - } - if (method_exists($entity, 'setRevisionCreationTime')) { - call_user_func([$entity, 'setRevisionCreationTime'], $this->time->getRequestTime()); + if ($entity instanceof RevisionLogInterface) { + $entity->setRevisionLogMessage('Revision created to normalize redirected internal links.'); + $entity->setRevisionCreationTime($this->time->getRequestTime()); } $entity->save(); From f94bb7744d661f2cf2f23b122323d42a7f1bc22c Mon Sep 17 00:00:00 2001 From: Arthur Baghdasaryan Date: Thu, 2 Apr 2026 14:36:03 +0400 Subject: [PATCH 5/7] Change --- .../src/RedirectLinkNormalizationManager.php | 85 ++++++++++++------- 1 file changed, 53 insertions(+), 32 deletions(-) diff --git a/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php b/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php index b903e4554a..703bf51461 100644 --- a/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php +++ b/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php @@ -13,6 +13,8 @@ * Normalizes redirected internal links on content entities. */ class RedirectLinkNormalizationManager { + private const REVISION_MESSAGE = 'Revision created to normalize redirected internal links.'; + private const NESTED_REVISION_MESSAGE = 'Revision created to normalize redirected internal links in nested content.'; /** * Constructs the manager. @@ -44,26 +46,12 @@ public function normalizeEntity(ContentEntityInterface $entity, bool $save = TRU $fieldType = $field->getFieldDefinition()->getType(); if (in_array($fieldType, ['text_long', 'text_with_summary', 'string_long'], TRUE)) { foreach ($field as $item) { - if (!isset($item->value) || $item->value === NULL || $item->value === '') { - continue; - } - $processed = $this->resolver->normalizeRedirectLinksInText($item->value); - if ($processed['changed']) { - $item->value = $processed['text']; - $changed = TRUE; - } + $changed = $this->normalizeTextItem($item, $changed); } } elseif ($fieldType === 'link') { foreach ($field as $item) { - if (empty($item->uri)) { - continue; - } - $processed = $this->resolver->normalizeRedirectLinkUri($item->uri); - if ($processed['changed']) { - $item->uri = $processed['uri']; - $changed = TRUE; - } + $changed = $this->normalizeLinkItem($item, $changed); } } } @@ -72,29 +60,62 @@ public function normalizeEntity(ContentEntityInterface $entity, bool $save = TRU return ['changed' => $changed, 'skipped' => FALSE]; } - if ($entity instanceof RevisionableInterface) { - $entity->setNewRevision(); - } - if ($entity instanceof RevisionLogInterface) { - $entity->setRevisionLogMessage('Revision created to normalize redirected internal links.'); - $entity->setRevisionCreationTime($this->time->getRequestTime()); - } + $this->prepareRevision($entity, self::REVISION_MESSAGE); $entity->save(); if ($entity->getEntityTypeId() === 'paragraph' && $node = Helper::getParentNode($entity)) { - if (method_exists($node, 'setNewRevision')) { - $node->setNewRevision(); - } - if (method_exists($node, 'setRevisionLogMessage')) { - $node->setRevisionLogMessage('Revision created to normalize redirected internal links in nested content.'); - } - if (method_exists($node, 'setRevisionCreationTime')) { - $node->setRevisionCreationTime($this->time->getRequestTime()); - } + $this->prepareRevision($node, self::NESTED_REVISION_MESSAGE); $node->save(); } return ['changed' => TRUE, 'skipped' => FALSE]; } + /** + * Normalize a text item value and return updated changed flag. + */ + private function normalizeTextItem(object $item, bool $changed): bool { + if (!isset($item->value) || $item->value === NULL || $item->value === '') { + return $changed; + } + + $processed = $this->resolver->normalizeRedirectLinksInText($item->value); + if ($processed['changed']) { + $item->value = $processed['text']; + return TRUE; + } + + return $changed; + } + + /** + * Normalize a link item URI and return updated changed flag. + */ + private function normalizeLinkItem(object $item, bool $changed): bool { + if (empty($item->uri)) { + return $changed; + } + + $processed = $this->resolver->normalizeRedirectLinkUri($item->uri); + if ($processed['changed']) { + $item->uri = $processed['uri']; + return TRUE; + } + + return $changed; + } + + /** + * Configure revision metadata when supported by entity type. + */ + private function prepareRevision(ContentEntityInterface $entity, string $message): void { + if ($entity instanceof RevisionableInterface) { + $entity->setNewRevision(); + } + if ($entity instanceof RevisionLogInterface) { + $entity->setRevisionLogMessage($message); + $entity->setRevisionCreationTime($this->time->getRequestTime()); + } + } + } From ae8400edc34ae7ecf79ea37ea0931151bfdd58b7 Mon Sep 17 00:00:00 2001 From: Arthur Baghdasaryan Date: Mon, 6 Apr 2026 11:49:35 +0400 Subject: [PATCH 6/7] Change --- .../custom/mass_redirect_normalizer/README.md | 136 +++++++-- .../mass_redirect_normalizer.info.yml | 1 + .../mass_redirect_normalizer.module | 1 - .../MassRedirectNormalizerCommands.php | 262 ++++++++++++++---- .../src/RedirectLinkNormalizationManager.php | 147 ++++++---- .../src/RedirectLinkResolver.php | 13 +- .../RedirectLinkNormalizationTest.php | 38 ++- 7 files changed, 458 insertions(+), 140 deletions(-) diff --git a/docroot/modules/custom/mass_redirect_normalizer/README.md b/docroot/modules/custom/mass_redirect_normalizer/README.md index 3d17b30d03..fe9a073b08 100644 --- a/docroot/modules/custom/mass_redirect_normalizer/README.md +++ b/docroot/modules/custom/mass_redirect_normalizer/README.md @@ -1,30 +1,126 @@ # Redirect Link Normalizer -This module rewrites internal links that currently rely on redirects so they -point to the final destination path directly. For rich text links, the process -also adds `data-entity-*` attributes when the final target resolves to a node. +This module rewrites internal links that still point at **redirect source paths** so they use the **final path** instead. For rich text, when the final target is a node, it also adds `data-entity-*` attributes. -## Manual execution +The same logic runs in two places: -- Dry run: - - `ddev drush --simulate mass-redirect-normalizer:normalize-links --limit=500` -- Execute: - - `ddev drush mass-redirect-normalizer:normalize-links --limit=5000` -- Optional filters: - - `--entity-type=node|paragraph|all` - - `--bundle=` - - `--show-unchanged` +- **Bulk Drush command** — scan many entities and fix stored values. +- **`hook_entity_presave()`** — when an editor saves a node or paragraph, links are normalized on that save. -## Periodic execution +--- -For one-time/periodic bulk cleanup, run the Drush command above. +## What gets scanned -For ongoing maintenance, this module also normalizes links during entity save -via `hook_entity_presave()` for nodes and paragraphs. This means new edits -automatically store final target paths instead of redirecting paths. +For each **node** or **paragraph**, the code looks at: -## Post-run usage refresh +- Text fields: `text_long`, `text_with_summary`, `string_long` (HTML `href` values inside the markup). +- **Link** fields (`link` type): the stored URI. + +It does **not** change random text; it only rewrites values the resolver treats as redirect-based internal links (see integration tests for examples). + +## Mre about code + +- `RedirectLinkResolver`: + - Only link logic. + - It finds the final path and rewrites one text value or one link value. + - It does **not** save entities. +- `RedirectLinkNormalizationManager`: + - Entity workflow logic. + - It loops fields on node/paragraph, calls the resolver, handles dry-run, and saves revisions when needed. + +This split keeps code easier to test and easier to maintain. + +--- + +## Drush command + +| Item | Value | +|------|--------| +| Command | `mass-redirect-normalizer:normalize-links` | +| Alias | `mnrl` | + +### Options + +| Option | Meaning | +|--------|---------| +| `--simulate` | Dry run: **no** database writes. Same idea as global `ddev drush --simulate ...`. | +| `--limit=N` | Max entities **per entity type** to load from the query. **`0` = no limit.** When `--entity-type=all`, you get up to **N nodes** and up to **N paragraphs** (two separate caps). | +| `--entity-type=node\|paragraph\|all` | Default **`all`** (nodes and paragraphs). | +| `--bundle=...` | Only that bundle (node type or paragraph type machine name). Still checked after load. | +| `--entity-ids=1,2,3` | Only these IDs. **Requires** `--entity-type=node` or `paragraph` (**not** `all`). Ignores `--limit`. | + +### Default table columns + +| Column | Notes | +|--------|--------| +| Status | `would_update` (simulate) or `updated` (real run). | +| Entity type | `node` or `paragraph`. | +| Entity ID | Entity id. | +| Parent node ID | For **paragraphs**, the host node id from `Helper::getParentNode()`. For nodes, `-`. | +| Bundle | Bundle / type machine name. | +| URL before / URL after | This is just the link value, not full HTML. For link fields, it shows the stored path/URL. For text fields, it shows only links that changed (`href`). If many links changed in one field, they are joined with `; `. If the value is too long, CLI shortens it. | + +### What the command skips + +- **Orphan paragraphs** — paragraphs that are not attached to real host content (`Helper::isParagraphOrphan()`). They are **not** processed and **do not** appear as rows. +- Entities with **no** redirect-based links to fix produce **no** rows (empty table is normal). + +### Simulate, then run, then verify (manual QA) + +1. **Preview:** + `ddev drush mass-redirect-normalizer:normalize-links --simulate --limit=100` +2. **Apply:** + `ddev drush mass-redirect-normalizer:normalize-links --limit=100` +3. **Re-check:** run **simulate** again with the same filters. Items that were fixed should **not** show `would_update` anymore (unless something else changed them back). + +For a narrow retest after you know specific IDs: -For large backfills, regenerate entity usage to refresh orphan reports: +`ddev drush mass-redirect-normalizer:normalize-links --simulate --entity-type=paragraph --entity-ids=123,456` + +### Important detail about saved content + +On **first save**, `hook_entity_presave()` may already rewrite links in the stored field values. So if you create test content in the UI and then expect the bulk command to “see” the old redirect URL in the database, it might already be normalized. The automated tests handle that case where needed. + +--- + +## Automated tests + +Existing-site integration tests live here: + +`docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php` + +Run tests: + +```bash +ddev exec ./vendor/bin/phpunit docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php +``` + +### What is covered + +- Redirect chain resolution (including query and fragment support). +- Rich-text rewriting (`href`) and node metadata attributes (`data-entity-*`). +- Link field URI normalization (`internal:/...` and absolute local mass.gov URLs). +- Redirect loops and max-depth behavior (no infinite follow, expected stop point). +- External URL behavior (ignored; no rewrite). +- Alias-like non-node targets (rewrite link, but do not add node metadata). +- Presave normalization path for nodes (`hook_entity_presave()` behavior). +- Manager behavior: + - Run it twice gives same result (first run fixes links, second run has nothing new to fix). + - Multi-value link field handling (only redirecting values change). + - Link item metadata preservation (`title`, `options`). +- Drush command behavior: + - Entity type and bundle filters. + - Targeted runs with `--entity-ids`. + - Simulate mode row output (`would_update`) and URL before/after columns. + +--- + +## Periodic / bulk cleanup + +Use the Drush command above for one-off or scheduled bulk runs. + +--- + +## Post-run usage refresh -- `ddev drush mass-content:usage-regenerate --batch-size=1000` +For large backfills, regenerate entity usage so usage reports stay accurate. diff --git a/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.info.yml b/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.info.yml index 362c28d93b..77e87895cb 100644 --- a/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.info.yml +++ b/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.info.yml @@ -6,4 +6,5 @@ package: Custom dependencies: - mass_fields:mass_fields - mass_content:mass_content + - mayflower:mayflower diff --git a/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.module b/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.module index 416e791e7b..85bc4f9f37 100644 --- a/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.module +++ b/docroot/modules/custom/mass_redirect_normalizer/mass_redirect_normalizer.module @@ -14,6 +14,5 @@ function mass_redirect_normalizer_entity_presave(EntityInterface $entity) { /** @var \Drupal\mass_redirect_normalizer\RedirectLinkNormalizationManager $manager */ $manager = \Drupal::service('mass_redirect_normalizer.manager'); - // In presave we mutate field values only; parent entity handles persistence. $manager->normalizeEntity($entity, FALSE); } diff --git a/docroot/modules/custom/mass_redirect_normalizer/src/Drush/Commands/MassRedirectNormalizerCommands.php b/docroot/modules/custom/mass_redirect_normalizer/src/Drush/Commands/MassRedirectNormalizerCommands.php index c894b69e14..e71e87bcef 100644 --- a/docroot/modules/custom/mass_redirect_normalizer/src/Drush/Commands/MassRedirectNormalizerCommands.php +++ b/docroot/modules/custom/mass_redirect_normalizer/src/Drush/Commands/MassRedirectNormalizerCommands.php @@ -3,14 +3,18 @@ namespace Drupal\mass_redirect_normalizer\Drush\Commands; use Consolidation\OutputFormatters\StructuredData\RowsOfFields; +use Drupal\Component\Utility\Html; +use Drupal\Component\Utility\Unicode; use Drupal\Core\Entity\EntityTypeManagerInterface; use Drupal\mass_redirect_normalizer\RedirectLinkNormalizationManager; +use Drupal\mayflower\Helper; +use Drupal\paragraphs\Entity\Paragraph; use Drush\Commands\AutowireTrait; use Drush\Commands\DrushCommands; use Drush\Drush; /** - * Drush commands for redirect link normalization. + * Drush command for redirect link normalization. */ final class MassRedirectNormalizerCommands extends DrushCommands { @@ -24,38 +28,63 @@ public function __construct( } /** - * Normalizes redirected internal links in node and paragraph content. + * Normalizes redirect-based links in nodes and paragraphs. + * + * Use --simulate (or global `drush --simulate`) to preview changes only. + * Without simulation, changes are saved. * * @command mass-redirect-normalizer:normalize-links * @field-labels * status: Status * entity_type: Entity Type * entity_id: Entity ID + * parent_node_id: Parent Node ID * bundle: Bundle + * field: Field + * delta: Delta + * kind: Kind + * before: URL before + * after: URL after * details: Details - * @default-fields status,entity_type,entity_id,bundle,details + * @default-fields status,entity_type,entity_id,parent_node_id,bundle,field,before,after * @aliases mnrl - * @option limit - * @option entity-type - * @option bundle - * @option show-unchanged + * @option limit Max entities per entity type (0 = no limit). + * @option entity-type Entity type: node, paragraph, or all (default). + * @option bundle Limit to this bundle / paragraph type. + * @option entity-ids Comma-separated IDs to process only (requires + * --entity-type=node or paragraph, not all). Ignores --limit. + * @option simulate Dry-run: show diffs only; do not save (same as global `drush --simulate`). + * @usage mass-redirect-normalizer:normalize-links --simulate --limit=100 + * Preview changes. Use --format=json for machine-readable output. */ - public function normalizeRedirectLinks($options = ['limit' => 0, 'entity-type' => 'all', 'bundle' => NULL, 'show-unchanged' => FALSE]): RowsOfFields { + public function normalizeRedirectLinks( + $options = [ + 'limit' => 0, + 'entity-type' => 'all', + 'bundle' => NULL, + 'entity-ids' => NULL, + 'simulate' => FALSE, + ], + ): RowsOfFields { $_ENV['MASS_FLAGGING_BYPASS'] = TRUE; $entityTypes = $options['entity-type'] === 'all' ? ['node', 'paragraph'] : [(string) $options['entity-type']]; $limit = max(0, (int) $options['limit']); - $showUnchanged = !empty($options['show-unchanged']); + $entityIdsOption = isset($options['entity-ids']) ? trim((string) $options['entity-ids']) : ''; try { - $simulate = Drush::simulate(); + $simulate = !empty($options['simulate']) || Drush::simulate(); } catch (\RuntimeException) { - // Allow direct invocation in PHPUnit without Drush bootstrap. - $simulate = FALSE; + // Allow PHPUnit to call this command without full Drush bootstrap. + $simulate = !empty($options['simulate']); } $rows = []; $processed = 0; - $changed = 0; - $skipped = 0; + $entitiesChanged = 0; + $fieldChanges = 0; + + if ($entityIdsOption !== '' && $options['entity-type'] === 'all') { + throw new \InvalidArgumentException('The --entity-ids option requires --entity-type=node or --entity-type=paragraph.'); + } foreach ($entityTypes as $entityType) { if (!in_array($entityType, ['node', 'paragraph'], TRUE)) { @@ -63,23 +92,34 @@ public function normalizeRedirectLinks($options = ['limit' => 0, 'entity-type' = 'status' => 'unsupported', 'entity_type' => $entityType, 'entity_id' => 'N/A', + 'parent_node_id' => '-', 'bundle' => 'N/A', + 'field' => '-', + 'delta' => '-', + 'kind' => '-', + 'before' => '-', + 'after' => '-', 'details' => 'Unsupported entity type', ]; continue; } - $idField = $entityType === 'node' ? 'nid' : 'id'; - $query = $this->entityTypeManager->getStorage($entityType)->getQuery() - ->accessCheck(FALSE) - ->sort($idField); - if (!empty($options['bundle'])) { - $query->condition('type', $options['bundle']); + if ($entityIdsOption !== '') { + $ids = array_values(array_filter(array_map('intval', preg_split('/\s*,\s*/', $entityIdsOption)))); } - if ($limit > 0) { - $query->range(0, $limit); + else { + $idField = $entityType === 'node' ? 'nid' : 'id'; + $query = $this->entityTypeManager->getStorage($entityType)->getQuery() + ->accessCheck(FALSE) + ->sort($idField); + if (!empty($options['bundle'])) { + $query->condition('type', $options['bundle']); + } + if ($limit > 0) { + $query->range(0, $limit); + } + $ids = $query->execute(); } - $ids = $query->execute(); foreach ($ids as $id) { $entity = $this->entityTypeManager->getStorage($entityType)->load($id); @@ -87,50 +127,156 @@ public function normalizeRedirectLinks($options = ['limit' => 0, 'entity-type' = continue; } - $result = $this->normalizerManager->normalizeEntity($entity, !$simulate); - $processed++; - if (!empty($result['changed'])) { - $changed++; - $rows[] = [ - 'status' => $simulate ? 'would_update' : 'updated', - 'entity_type' => $entityType, - 'entity_id' => $id, - 'bundle' => $entity->bundle(), - 'details' => 'Redirect-based links normalized', - ]; + if (!empty($options['bundle']) && $entity->bundle() !== $options['bundle']) { + continue; } - elseif (!empty($result['skipped'])) { - $skipped++; - $rows[] = [ - 'status' => 'skipped', - 'entity_type' => $entityType, - 'entity_id' => $id, - 'bundle' => $entity->bundle(), - 'details' => 'Orphan paragraph skipped', - ]; + + // Skip orphan paragraphs. + if (Helper::isParagraphOrphan($entity)) { + continue; } - elseif ($showUnchanged) { - $rows[] = [ - 'status' => 'unchanged', - 'entity_type' => $entityType, - 'entity_id' => $id, - 'bundle' => $entity->bundle(), - 'details' => 'No redirect-based links found', - ]; + + $result = $this->normalizerManager->normalizeEntity($entity, !$simulate, $simulate); + $processed++; + if (!empty($result['changed'])) { + $entitiesChanged++; + $changes = $result['changes'] ?? []; + $fieldChanges += count($changes); + $parentNodeId = '-'; + if ($entityType === 'paragraph' && $entity instanceof Paragraph) { + $parentNode = Helper::getParentNode($entity); + $parentNodeId = $parentNode ? (string) $parentNode->id() : '-'; + } + foreach ($changes as $change) { + [$beforePreview, $afterPreview] = $this->buildUrlBeforeAfter( + (string) $change['kind'], + (string) $change['before'], + (string) $change['after'], + ); + $rows[] = [ + 'status' => $simulate ? 'would_update' : 'updated', + 'entity_type' => $entityType, + 'entity_id' => $id, + 'parent_node_id' => $parentNodeId, + 'bundle' => $entity->bundle(), + 'field' => $change['field'], + 'delta' => (string) $change['delta'], + 'kind' => $change['kind'], + 'before' => $beforePreview, + 'after' => $afterPreview, + 'details' => $simulate ? 'dry-run' : 'saved', + ]; + } } } } $mode = $simulate ? 'SIMULATION' : 'EXECUTION'; - $rows[] = [ - 'status' => 'summary', - 'entity_type' => 'all', - 'entity_id' => (string) $processed, - 'bundle' => 'N/A', - 'details' => "{$mode} complete. changed={$changed}; skipped={$skipped}", - ]; + if ($this->logger()) { + $this->logger()->notice((string) dt('@mode: scanned @count entities; updated: @updated; field changes: @diffs.', [ + '@mode' => $mode, + '@count' => $processed, + '@updated' => $entitiesChanged, + '@diffs' => $fieldChanges, + ])); + } return new RowsOfFields($rows); } + /** + * Builds URL-only before/after values for table output. + * + * Link fields show a readable URI/path. + * Text fields show changed href values in document order. + */ + private function buildUrlBeforeAfter(string $kind, string $before, string $after): array { + $max = 120; + if ($kind === 'link') { + return [ + $this->truncateForTable($this->formatUriForDisplay($before), $max), + $this->truncateForTable($this->formatUriForDisplay($after), $max), + ]; + } + + $beforeHrefs = $this->extractAnchorHrefs($before); + $afterHrefs = $this->extractAnchorHrefs($after); + $pairs = []; + $count = max(count($beforeHrefs), count($afterHrefs)); + for ($i = 0; $i < $count; $i++) { + $b = $beforeHrefs[$i] ?? ''; + $a = $afterHrefs[$i] ?? ''; + if ($b !== $a) { + $pairs[] = [$b, $a]; + } + } + + if ($pairs === []) { + if ($beforeHrefs !== [] || $afterHrefs !== []) { + return [ + $this->truncateForTable($beforeHrefs[0] ?? '-', $max), + $this->truncateForTable($afterHrefs[0] ?? '-', $max), + ]; + } + return ['-', '-']; + } + + $beforeUrls = implode('; ', array_column($pairs, 0)); + $afterUrls = implode('; ', array_column($pairs, 1)); + return [ + $this->truncateForTable($beforeUrls, $max), + $this->truncateForTable($afterUrls, $max), + ]; + } + + /** + * Lists href attribute values for anchors in document order. + * + * @return array + * A list of href strings. + */ + private function extractAnchorHrefs(string $html): array { + if ($html === '') { + return []; + } + $dom = Html::load($html); + $xpath = new \DOMXPath($dom); + $hrefs = []; + foreach ($xpath->query('//a[@href]') as $anchor) { + if ($anchor instanceof \DOMElement) { + $hrefs[] = (string) $anchor->getAttribute('href'); + } + } + return $hrefs; + } + + /** + * Formats link-field URIs for CLI output. + */ + private function formatUriForDisplay(string $uri): string { + $uri = trim($uri); + if ($uri === '') { + return '-'; + } + if (str_starts_with($uri, 'internal:')) { + $rest = substr($uri, strlen('internal:')); + $path = (string) parse_url($rest, PHP_URL_PATH); + $query = (string) parse_url($rest, PHP_URL_QUERY); + $fragment = (string) parse_url($rest, PHP_URL_FRAGMENT); + $out = ($path !== '' ? $path : '/') . ($query !== '' ? '?' . $query : '') . ($fragment !== '' ? '#' . $fragment : ''); + return $out !== '' ? $out : $uri; + } + return $uri; + } + + /** + * Shortens long values for the CLI table. + */ + private function truncateForTable(string $text, int $max = 72): string { + if (mb_strlen($text) <= $max) { + return $text; + } + return Unicode::truncate($text, $max, FALSE, TRUE); + } + } diff --git a/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php b/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php index 703bf51461..fcbbc679ef 100644 --- a/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php +++ b/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkNormalizationManager.php @@ -10,14 +10,17 @@ use Drupal\paragraphs\Entity\Paragraph; /** - * Normalizes redirected internal links on content entities. + * Manager class: entity processing and save flow. + * + * This class loops entity fields, calls the resolver, and decides if/when + * to save revisions. The resolver class only handles link rewrite logic. */ class RedirectLinkNormalizationManager { private const REVISION_MESSAGE = 'Revision created to normalize redirected internal links.'; private const NESTED_REVISION_MESSAGE = 'Revision created to normalize redirected internal links in nested content.'; /** - * Constructs the manager. + * Creates the manager. */ public function __construct( protected RedirectLinkResolver $resolver, @@ -26,38 +29,44 @@ public function __construct( } /** - * Processes redirect-based links in an entity. + * Normalizes redirect-based links on one entity. * * @param \Drupal\Core\Entity\ContentEntityInterface $entity * Node or paragraph entity. * @param bool $save - * Whether to persist updates. + * If TRUE, save changes and create revisions when possible. + * @param bool $dryRun + * If TRUE, only collect changes and do not write values. * * @return array - * Processing result. + * Result with keys: changed, skipped, and changes. */ - public function normalizeEntity(ContentEntityInterface $entity, bool $save = TRUE): array { + public function normalizeEntity(ContentEntityInterface $entity, bool $save = TRUE, bool $dryRun = FALSE): array { if ($entity instanceof Paragraph && Helper::isParagraphOrphan($entity)) { - return ['changed' => FALSE, 'skipped' => TRUE]; + return ['changed' => FALSE, 'skipped' => TRUE, 'changes' => []]; } - $changed = FALSE; - foreach ($entity->getFields() as $field) { - $fieldType = $field->getFieldDefinition()->getType(); - if (in_array($fieldType, ['text_long', 'text_with_summary', 'string_long'], TRUE)) { - foreach ($field as $item) { - $changed = $this->normalizeTextItem($item, $changed); - } - } - elseif ($fieldType === 'link') { - foreach ($field as $item) { - $changed = $this->normalizeLinkItem($item, $changed); - } - } + $apply = !$dryRun; + $result = $this->collectFieldNormalizations($entity, $apply); + + if (!$result['changed']) { + return ['changed' => FALSE, 'skipped' => FALSE, 'changes' => []]; + } + + if ($dryRun) { + return [ + 'changed' => TRUE, + 'skipped' => FALSE, + 'changes' => $result['changes'], + ]; } - if (!$changed || !$save) { - return ['changed' => $changed, 'skipped' => FALSE]; + if (!$save) { + return [ + 'changed' => TRUE, + 'skipped' => FALSE, + 'changes' => $result['changes'], + ]; } $this->prepareRevision($entity, self::REVISION_MESSAGE); @@ -68,45 +77,81 @@ public function normalizeEntity(ContentEntityInterface $entity, bool $save = TRU $node->save(); } - return ['changed' => TRUE, 'skipped' => FALSE]; - } - - /** - * Normalize a text item value and return updated changed flag. - */ - private function normalizeTextItem(object $item, bool $changed): bool { - if (!isset($item->value) || $item->value === NULL || $item->value === '') { - return $changed; - } - - $processed = $this->resolver->normalizeRedirectLinksInText($item->value); - if ($processed['changed']) { - $item->value = $processed['text']; - return TRUE; - } - - return $changed; + return [ + 'changed' => TRUE, + 'skipped' => FALSE, + 'changes' => $result['changes'], + ]; } /** - * Normalize a link item URI and return updated changed flag. + * Scans text and link fields and updates values when needed. + * + * @return array + * An array with keys: + * - changed (bool): TRUE when at least one value changed. + * - changes (array): List of changed items (field, delta, kind, before, + * after). */ - private function normalizeLinkItem(object $item, bool $changed): bool { - if (empty($item->uri)) { - return $changed; - } + private function collectFieldNormalizations(ContentEntityInterface $entity, bool $apply): array { + $changed = FALSE; + $changes = []; - $processed = $this->resolver->normalizeRedirectLinkUri($item->uri); - if ($processed['changed']) { - $item->uri = $processed['uri']; - return TRUE; + foreach ($entity->getFields() as $fieldName => $field) { + $fieldType = $field->getFieldDefinition()->getType(); + if (in_array($fieldType, ['text_long', 'text_with_summary', 'string_long'], TRUE)) { + foreach ($field as $delta => $item) { + if (!isset($item->value) || $item->value === NULL || $item->value === '') { + continue; + } + $before = (string) $item->value; + $processed = $this->resolver->normalizeRedirectLinksInText($before); + if (!$processed['changed']) { + continue; + } + $changed = TRUE; + $changes[] = [ + 'field' => (string) $fieldName, + 'delta' => (int) $delta, + 'kind' => 'text', + 'before' => $before, + 'after' => $processed['text'], + ]; + if ($apply) { + $item->value = $processed['text']; + } + } + } + elseif ($fieldType === 'link') { + foreach ($field as $delta => $item) { + if (empty($item->uri)) { + continue; + } + $before = (string) $item->uri; + $processed = $this->resolver->normalizeRedirectLinkUri($before); + if (!$processed['changed']) { + continue; + } + $changed = TRUE; + $changes[] = [ + 'field' => (string) $fieldName, + 'delta' => (int) $delta, + 'kind' => 'link', + 'before' => $before, + 'after' => $processed['uri'], + ]; + if ($apply) { + $item->uri = $processed['uri']; + } + } + } } - return $changed; + return ['changed' => $changed, 'changes' => $changes]; } /** - * Configure revision metadata when supported by entity type. + * Sets revision data if the entity supports revisions. */ private function prepareRevision(ContentEntityInterface $entity, string $message): void { if ($entity instanceof RevisionableInterface) { diff --git a/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkResolver.php b/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkResolver.php index b0cceb737b..9e6cc4708a 100644 --- a/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkResolver.php +++ b/docroot/modules/custom/mass_redirect_normalizer/src/RedirectLinkResolver.php @@ -11,12 +11,15 @@ use Symfony\Component\Routing\RequestContext; /** - * Resolves and rewrites redirect-based internal links. + * Resolver class: pure link rewrite logic. + * + * This class only answers "what should this link become?". + * It does not loop entity fields and does not save entities. */ class RedirectLinkResolver { /** - * Creates a resolver instance. + * Creates the resolver. */ public function __construct( protected EntityTypeManagerInterface $entityTypeManager, @@ -27,7 +30,7 @@ public function __construct( } /** - * Normalizes redirected internal links in rich text. + * Rewrites redirect-based links in rich text. */ public function normalizeRedirectLinksInText(string $text): array { $dom = Html::load($text); @@ -60,7 +63,7 @@ public function normalizeRedirectLinksInText(string $text): array { } /** - * Normalizes redirected internal links in link fields. + * Rewrites redirect-based links in link fields. */ public function normalizeRedirectLinkUri(string $uri): array { $resolved = $this->resolveRedirectTarget($uri); @@ -78,7 +81,7 @@ public function normalizeRedirectLinkUri(string $uri): array { } /** - * Resolves an internal URL/path through redirect chains. + * Follows redirect chain and returns the final local path. */ public function resolveRedirectTarget(string $url, int $maxDepth = 10): array { $parsed = parse_url($url) ?: []; diff --git a/docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php b/docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php index f5e6618975..d2095ad41c 100644 --- a/docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php +++ b/docroot/modules/custom/mass_redirect_normalizer/tests/src/ExistingSite/RedirectLinkNormalizationTest.php @@ -49,7 +49,7 @@ public function testRedirectChainNormalizationInText(): void { 'status' => 1, 'moderation_state' => 'published', ]); - [$sourceStart, $sourceFinal] = $this->createRedirectChain($target); + [$sourceStart] = $this->createRedirectChain($target); $redirectStorage = \Drupal::entityTypeManager()->getStorage('redirect'); $matching = $redirectStorage->loadByProperties([ @@ -347,6 +347,29 @@ public function testCommandOptionsEntityTypeAndBundleFiltering(): void { ], ]); + // Presave hook rewrites redirect links on first save, so the stored body no + // longer contains the redirect path. Put the redirect URL back in the DB so + // the bulk command (which loads from storage) has something to normalize. + $redirect_markup = '

Node-only

'; + $nid = (int) $page->id(); + $vid = (int) $page->getRevisionId(); + $connection = \Drupal::database(); + foreach (['node__body', 'node_revision__body'] as $table) { + $connection->update($table) + ->fields(['body_value' => $redirect_markup]) + ->condition('entity_id', $nid) + ->condition('revision_id', $vid) + ->execute(); + } + \Drupal::entityTypeManager()->getStorage('node')->resetCache([$nid]); + + /** @var \Drupal\mass_redirect_normalizer\RedirectLinkNormalizationManager $manager */ + $manager = \Drupal::service('mass_redirect_normalizer.manager'); + $reloaded = \Drupal::entityTypeManager()->getStorage('node')->load($nid); + $this->assertNotNull($reloaded); + $dryPreview = $manager->normalizeEntity($reloaded, FALSE, TRUE); + $this->assertNotEmpty($dryPreview['changed'], 'Dry run should detect redirect-based link in body.'); + $command = new MassRedirectNormalizerCommands( \Drupal::entityTypeManager(), \Drupal::service('mass_redirect_normalizer.manager') @@ -354,17 +377,18 @@ public function testCommandOptionsEntityTypeAndBundleFiltering(): void { $rowsObj = $command->normalizeRedirectLinks([ 'entity-type' => 'node', 'bundle' => 'page', + 'entity-ids' => (string) $page->id(), 'limit' => 0, - 'show-unchanged' => TRUE, + 'simulate' => TRUE, ]); $rows = method_exists($rowsObj, 'getArrayCopy') ? $rowsObj->getArrayCopy() : iterator_to_array($rowsObj); $this->assertNotEmpty($rows); - $nonSummaryRows = array_filter($rows, fn($row) => ($row['status'] ?? '') !== 'summary'); - $this->assertNotEmpty($nonSummaryRows); - foreach ($nonSummaryRows as $row) { + foreach ($rows as $row) { $this->assertSame('node', $row['entity_type']); $this->assertSame('page', $row['bundle']); + $this->assertSame('would_update', $row['status']); + $this->assertNotSame($row['before'], $row['after']); } } @@ -424,6 +448,8 @@ public function testManagerNormalizesOnlyRedirectingLinksInMultiValueField(): vo $this->assertTrue($result['changed']); $reloaded = \Drupal::entityTypeManager()->getStorage('node')->load($node->id()); + $this->assertNotNull($reloaded); + /** @var \Drupal\node\NodeInterface $reloaded */ $links = $reloaded->get('field_social_links')->getValue(); $this->assertStringContainsString($target->toUrl()->toString(), $links[0]['uri']); @@ -474,6 +500,8 @@ public function testLinkItemMetadataIsPreservedDuringNormalization(): void { $this->assertTrue($result['changed']); $reloaded = \Drupal::entityTypeManager()->getStorage('node')->load($node->id()); + $this->assertNotNull($reloaded); + /** @var \Drupal\node\NodeInterface $reloaded */ $item = $reloaded->get('field_social_links')->first(); $this->assertNotNull($item); $this->assertSame('keep-title', $item->title); From dd52562d306a8770244ffeb53e79b62e1b3ba052 Mon Sep 17 00:00:00 2001 From: Arthur Baghdasaryan Date: Tue, 7 Apr 2026 13:33:13 +0400 Subject: [PATCH 7/7] DP-45831: Redirect audit module --- composer.json | 1 + composer.lock | 63 ++++++++++++++++++- conf/drupal/config/core.extension.yml | 2 +- conf/drupal/config/mass_utility.settings.yml | 6 +- .../drupal/config/redirect_audit.settings.yml | 7 +++ 5 files changed, 74 insertions(+), 5 deletions(-) create mode 100644 conf/drupal/config/redirect_audit.settings.yml diff --git a/composer.json b/composer.json index e5835c891e..630830fbee 100644 --- a/composer.json +++ b/composer.json @@ -260,6 +260,7 @@ "drupal/r4032login": "^2.2", "drupal/rabbit_hole": "^1.1", "drupal/redirect": "^1", + "drupal/redirect_audit": "^1.3", "drupal/require_on_publish": "^2.0", "drupal/scheduled_transitions": "^2.7", "drupal/schema_metatag": "^3.0", diff --git a/composer.lock b/composer.lock index 50cba7a8a8..a08e83366b 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "6210d5324ba762bdc4e9a2b52bf782bb", + "content-hash": "8c7468a7ab6d75a0dddc179e48526a16", "packages": [ { "name": "akamai-open/edgegrid-auth", @@ -9796,6 +9796,67 @@ "source": "https://git.drupalcode.org/project/redirect" } }, + { + "name": "drupal/redirect_audit", + "version": "1.3.0", + "source": { + "type": "git", + "url": "https://git.drupalcode.org/project/redirect_audit.git", + "reference": "1.3.0" + }, + "dist": { + "type": "zip", + "url": "https://ftp.drupal.org/files/projects/redirect_audit-1.3.0.zip", + "reference": "1.3.0", + "shasum": "7d97642152121ca2a8189265a860c177254daf89" + }, + "require": { + "drupal/core": "^10 || ^11", + "drupal/redirect": "^1.0" + }, + "type": "drupal-module", + "extra": { + "drupal": { + "version": "1.3.0", + "datestamp": "1771940051", + "security-coverage": { + "status": "covered", + "message": "Covered by Drupal's security advisory policy" + } + } + }, + "notification-url": "https://packages.drupal.org/8/downloads", + "license": [ + "GPL-2.0-or-later" + ], + "authors": [ + { + "name": "Antonio Nuñez", + "homepage": "https://www.drupal.org/u/antonio-nunez", + "role": "Maintainer" + }, + { + "name": "lpeidro", + "homepage": "https://www.drupal.org/user/3372326" + }, + { + "name": "tunic", + "homepage": "https://www.drupal.org/user/397132" + } + ], + "description": "Provides auditing and resolution functions for redirects, detecting chains and loops.", + "homepage": "https://www.drupal.org/project/redirect_audit", + "keywords": [ + "Audit", + "Drupal", + "Redirect", + "SEO" + ], + "support": { + "source": "https://git.drupalcode.org/project/redirect_audit", + "issues": "https://www.drupal.org/project/issues/redirect_audit" + } + }, { "name": "drupal/require_on_publish", "version": "2.0.0", diff --git a/conf/drupal/config/core.extension.yml b/conf/drupal/config/core.extension.yml index 2648253e49..a0e707d1ac 100644 --- a/conf/drupal/config/core.extension.yml +++ b/conf/drupal/config/core.extension.yml @@ -145,7 +145,6 @@ module: mass_microsites: 0 mass_more_lists: 0 mass_nav: 0 - mass_redirect_normalizer: 0 mass_redirects: 0 mass_scheduled_transitions: 0 mass_schema_apply_action: 0 @@ -212,6 +211,7 @@ module: rabbit_hole: 0 redirect: 0 redirect_404: 0 + redirect_audit: 0 require_on_publish: 0 responsive_image: 0 rest: 0 diff --git a/conf/drupal/config/mass_utility.settings.yml b/conf/drupal/config/mass_utility.settings.yml index 0f60a85d54..a7d2900578 100644 --- a/conf/drupal/config/mass_utility.settings.yml +++ b/conf/drupal/config/mass_utility.settings.yml @@ -1,8 +1,8 @@ allowed_urls: "https://www.youtube.com/\r\nhttps://docs.digital.mass.gov\r\nhttps://public.dep.state.ma.us/\r\nhttps://calendar.google.com/\r\nhttps://dashboards.digital.mass.gov/\r\nhttps://docs.google.com/\r\nhttps://drive.google.com/\r\nhttps://fusiontables.googleusercontent.com/\r\nhttps://libraryh3lp.com/\r\nhttps://mass-eoeea.maps.arcgis.com/\r\nhttps://massgov.formstack.com/forms/sample\r\nhttps://massgov.github.io\r\nhttps://public.tableau.com/\r\nhttps://www.google.com/\r\nhttps://www.massdot.state.ma.us/\r\nhttps://www.massmarinefisheries.net/\r\nhttps://www.youtube.com/\r\nhttps://youtu.be/\r\nhttps://memamaps.maps.arcgis.com/\r\nhttps://maps.google.com/\r\nhttps://licensing.reg.state.ma.us/\r\nhttps://hwy.massdot.state.ma.us/\r\nhttps://dphanalytics.hhs.mass.gov/\r\nhttps://code.highcharts.com/\r\nhttps://eoeea.maps.arcgis.com/\r\nhttps://eeaonline.eea.state.ma.us/\r\nhttps://gis.massdot.state.ma.us/\r\nhttps://dotfeeds.state.ma.us/\r\nhttps://massgis.maps.arcgis.com/\r\nhttps://recollect.net/\r\nhttp://massdot.maps.arcgis.com/\r\nhttps://massdot.maps.arcgis.com/\r\nhttps://calculator.digital.mass.gov/\r\nhttps://api.recollect.net/\r\nhttps://www.eia.gov/beta/states/iframe\r\nhttps://mdphgis.maps.arcgis.com/\r\nhttps://app.powerbigov.us/\r\nhttps://calc.a4we.org/\r\nhttps://w.soundcloud.com/\r\nhttps://www.google.com/maps\r\nhttps://nedews.nrcc.cornell.edu/\r\nhttps://flo.uri.sh/\r\nhttps://app.smartsheet.com/\r\nhttps://experience.arcgis.com/\r\nhttps://hedfuel.azurewebsites.net/\r\nhttps://dhcd-production-public.s3.amazonaws.com/\r\nhttps://cloud.samsara.com/o/8600/fleet/viewer/\r\nhttps://hwywebqa.massdot.state.ma.us\r\nhttps://player.vimeo.com/video/\r\nhttps://massgov.formstack.com/forms/" forms_allowed_hostnames: - - '/^mass-forms\.ddev\.site$/' - - '/^forms\.mass\.local$/' - - '/^forms\.mass\.gov$/' + - /^mass-forms\.ddev\.site$/ + - /^forms\.mass\.local$/ + - /^forms\.mass\.gov$/ - '/^[a-zA-Z0-9\-]+-mass-forms\.pantheonsite\.io$/' - '/^[a-zA-Z0-9\-]+\.forms\.mass\.gov$/' header_mixed_urls: "\r\n" diff --git a/conf/drupal/config/redirect_audit.settings.yml b/conf/drupal/config/redirect_audit.settings.yml new file mode 100644 index 0000000000..6a5fc51f32 --- /dev/null +++ b/conf/drupal/config/redirect_audit.settings.yml @@ -0,0 +1,7 @@ +_core: + default_config_hash: hz8P2E_PUpHAuiZpVNwTrG3074UQ_2Q8SuYZpIp_v6U +autofix_enabled: false +scan_on_change: true +batch_size: 50 +max_chain_depth: 10 +items_per_page: 20