From 15273ba1649c0ccbc51e30a0db526374049a088f Mon Sep 17 00:00:00 2001
From: Emma <emma.lowe@digital.cabinet-office.gov.uk>
Date: Wed, 22 Apr 2026 14:44:07 +0100
Subject: [PATCH 1/7] Update duplicates rake tasks to use govuk index

The govuk index will be the only relevant index to run
these rake tasks on, once the government index has been
retired (this is in progress).

The duplicates are located by content_id, which only appears
as a field in the content indexes and not the auxiliary
indexes.
---
 lib/tasks/duplicates.rake          | 8 ++++----
 spec/unit/tasks/duplicates_spec.rb | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/lib/tasks/duplicates.rake b/lib/tasks/duplicates.rake
index a85f6d6aa..ab74a9f91 100644
--- a/lib/tasks/duplicates.rake
+++ b/lib/tasks/duplicates.rake
@@ -2,8 +2,8 @@ require "rummager"
 
 namespace :duplicates do
   desc "Find all documents with the same content_id"
-  task :find, [:index] do |_t, args|
-    index = args[:index] || "government"
+  task :find do
+    index = SearchConfig.govuk_index_name
 
     duplicates = Search::DuplicateFinder.new(index:).find_duplicates
 
@@ -16,8 +16,8 @@ namespace :duplicates do
   end
 
   desc "Find all documents with the same content_id and remove them"
-  task :remove, [:index] do |_t, args|
-    index = args[:index] || "government"
+  task :remove do
+    index = SearchConfig.govuk_index_name
 
     duplicates = Search::DuplicateFinder.new(index:).find_duplicates
     puts "No duplicates found" if duplicates.empty?
diff --git a/spec/unit/tasks/duplicates_spec.rb b/spec/unit/tasks/duplicates_spec.rb
index 708715c41..4945e0338 100644
--- a/spec/unit/tasks/duplicates_spec.rb
+++ b/spec/unit/tasks/duplicates_spec.rb
@@ -20,7 +20,7 @@
       },
     ]
   end
-  let(:index) { "govuk" }
+  let(:index) { "govuk_test" }
 
   before do
     Rake::Task[task_name].reenable

From 193520cd1a72baa85cad5175bfeae692e9db49ef Mon Sep 17 00:00:00 2001
From: Emma <emma.lowe@digital.cabinet-office.gov.uk>
Date: Tue, 28 Apr 2026 17:04:55 +0100
Subject: [PATCH 2/7] Refactor duplicates

This code can be simplified now that we're only using
the duplicates rake tasks on the govuk index.
---
 lib/search/duplicate_finder.rb                    |  8 +-------
 lib/search/duplicate_remover.rb                   |  7 +++----
 lib/tasks/duplicates.rake                         | 10 +++-------
 spec/integration/search/duplicate_finder_spec.rb  |  6 +++---
 spec/integration/search/duplicate_remover_spec.rb |  6 +++---
 spec/unit/tasks/duplicates_spec.rb                | 13 +++++--------
 6 files changed, 18 insertions(+), 32 deletions(-)

diff --git a/lib/search/duplicate_finder.rb b/lib/search/duplicate_finder.rb
index 79619f9d8..504d19be5 100644
--- a/lib/search/duplicate_finder.rb
+++ b/lib/search/duplicate_finder.rb
@@ -2,15 +2,9 @@ module Search
   class DuplicateFinder
     TIMEOUT = 60
 
-    attr_reader :index
-
-    def initialize(index:)
-      @index = index
-    end
-
     def find_duplicates
       response = Services.elasticsearch(timeout: TIMEOUT).search(
-        index: index,
+        index: SearchConfig.govuk_index_name,
         size: 0,
         body: {
           aggs: {
diff --git a/lib/search/duplicate_remover.rb b/lib/search/duplicate_remover.rb
index c9396ab7e..54d11c55c 100644
--- a/lib/search/duplicate_remover.rb
+++ b/lib/search/duplicate_remover.rb
@@ -1,9 +1,8 @@
 module Search
   class DuplicateRemover
-    attr_reader :index, :logger
+    attr_reader :logger
 
-    def initialize(index:, logger: Logger.new($stdout))
-      @index = index
+    def initialize(logger: Logger.new($stdout))
       @logger = logger
     end
 
@@ -39,7 +38,7 @@ def sort_by_updated_at(documents)
 
     def delete_document(link)
       Services.elasticsearch.delete_by_query(
-        index: index,
+        index: SearchConfig.govuk_index_name,
         body: { query: { term: { link: link } } },
       )
       logger.info "Deleted duplicate document: #{link}"
diff --git a/lib/tasks/duplicates.rake b/lib/tasks/duplicates.rake
index ab74a9f91..c05f73e05 100644
--- a/lib/tasks/duplicates.rake
+++ b/lib/tasks/duplicates.rake
@@ -3,9 +3,7 @@ require "rummager"
 namespace :duplicates do
   desc "Find all documents with the same content_id"
   task :find do
-    index = SearchConfig.govuk_index_name
-
-    duplicates = Search::DuplicateFinder.new(index:).find_duplicates
+    duplicates = Search::DuplicateFinder.new.find_duplicates
 
     duplicates.each do |duplicate|
       puts "Content_id: #{duplicate[:content_id]}"
@@ -17,11 +15,9 @@ namespace :duplicates do
 
   desc "Find all documents with the same content_id and remove them"
   task :remove do
-    index = SearchConfig.govuk_index_name
-
-    duplicates = Search::DuplicateFinder.new(index:).find_duplicates
+    duplicates = Search::DuplicateFinder.new.find_duplicates
     puts "No duplicates found" if duplicates.empty?
 
-    Search::DuplicateRemover.new(index:).remove_duplicates(duplicates: duplicates)
+    Search::DuplicateRemover.new.remove_duplicates(duplicates: duplicates)
   end
 end
diff --git a/spec/integration/search/duplicate_finder_spec.rb b/spec/integration/search/duplicate_finder_spec.rb
index 692bc2c37..fa5a4bcaa 100644
--- a/spec/integration/search/duplicate_finder_spec.rb
+++ b/spec/integration/search/duplicate_finder_spec.rb
@@ -4,7 +4,7 @@
   let(:index) { "govuk_test" }
   describe "there are no documents in Elasticsearch" do
     it "returns an empty array" do
-      expect(Search::DuplicateFinder.new(index:).find_duplicates).to eq([])
+      expect(Search::DuplicateFinder.new.find_duplicates).to eq([])
     end
   end
   describe "there are documents in Elasticsearch, none have a duplicate content_id" do
@@ -12,7 +12,7 @@
       (1..10).each do |n|
         commit_document(index, { link: "link/path#{n}", content_id: SecureRandom.uuid })
       end
-      expect(Search::DuplicateFinder.new(index:).find_duplicates).to be_empty
+      expect(Search::DuplicateFinder.new.find_duplicates).to be_empty
     end
   end
   describe "there are documents in Elasticsearch, some have a duplicate content_id" do
@@ -27,7 +27,7 @@
       commit_document(index, { link: "link/path_c", content_id: "other", title: "title_c", updated_at: date_2 })
       commit_document(index, { link: "link/path_d", content_id: "other", title: "title_d" })
 
-      result = Search::DuplicateFinder.new(index:).find_duplicates
+      result = Search::DuplicateFinder.new.find_duplicates
 
       expect(result).to match_array([
         a_hash_including(
diff --git a/spec/integration/search/duplicate_remover_spec.rb b/spec/integration/search/duplicate_remover_spec.rb
index e26e2d6cb..44671d20f 100644
--- a/spec/integration/search/duplicate_remover_spec.rb
+++ b/spec/integration/search/duplicate_remover_spec.rb
@@ -1,11 +1,11 @@
 require "spec_helper"
 
 RSpec.describe Search::DuplicateRemover do
-  let(:index) { "government_test" }
+  let(:index) { "govuk_test" }
   let(:io) { StringIO.new }
   let(:logger) { Logger.new(io) }
-  let(:duplicates) { Search::DuplicateFinder.new(index:).find_duplicates }
-  subject(:remover) { described_class.new(index:, logger:) }
+  let(:duplicates) { Search::DuplicateFinder.new.find_duplicates }
+  subject(:remover) { described_class.new(logger:) }
 
   context "A set of duplicate documents has no updated_at field" do
     before :each do
diff --git a/spec/unit/tasks/duplicates_spec.rb b/spec/unit/tasks/duplicates_spec.rb
index 4945e0338..4ef4165b1 100644
--- a/spec/unit/tasks/duplicates_spec.rb
+++ b/spec/unit/tasks/duplicates_spec.rb
@@ -20,21 +20,19 @@
       },
     ]
   end
-  let(:index) { "govuk_test" }
 
   before do
     Rake::Task[task_name].reenable
     allow(Search::DuplicateFinder)
       .to receive(:new)
-            .with(index:)
-            .and_return(double(find_duplicates: fake_duplicates))
+      .and_return(double(find_duplicates: fake_duplicates))
   end
 
   describe "duplicates:find" do
     let(:task_name) { "duplicates:find" }
 
     it "prints duplicate sets in the expected format" do
-      output = capture_stdout { Rake::Task[task_name].invoke(index) }
+      output = capture_stdout { Rake::Task[task_name].invoke }
 
       expect(output).to include("Content_id: aaa-111")
       expect(output).to include("  T1 /a1 2020-01-01")
@@ -51,19 +49,18 @@
     before do
       allow(Search::DuplicateRemover)
         .to receive(:new)
-              .with(index:)
-              .and_return(duplicate_remover)
+        .and_return(duplicate_remover)
     end
     describe "there are duplicates" do
       it "removes duplicates" do
-        Rake::Task[task_name].invoke(index)
+        Rake::Task[task_name].invoke
         expect(duplicate_remover).to have_received(:remove_duplicates).with(duplicates: fake_duplicates).once
       end
     end
     describe "there are no duplicates" do
       let(:fake_duplicates) { [] }
       it "does not remove duplicates" do
-        output = capture_stdout { Rake::Task[task_name].invoke(index) }
+        output = capture_stdout { Rake::Task[task_name].invoke }
         expect(output).to eq("No duplicates found\n")
       end
     end

From 807786bb5b143b199683a0212a80056309bae06c Mon Sep 17 00:00:00 2001
From: Emma <emma.lowe@digital.cabinet-office.gov.uk>
Date: Wed, 22 Apr 2026 17:08:33 +0100
Subject: [PATCH 3/7] Remove export:search rake task

This has been broken since 2020, so it seems unlikely that
it will be missed. We can get the results in a json format
from search API anyway.

See PR that broke the task:
https://github.com/alphagov/search-api/pull/2062
---
 lib/tasks/export.rake | 38 --------------------------------------
 1 file changed, 38 deletions(-)
 delete mode 100644 lib/tasks/export.rake

diff --git a/lib/tasks/export.rake b/lib/tasks/export.rake
deleted file mode 100644
index e60b6f041..000000000
--- a/lib/tasks/export.rake
+++ /dev/null
@@ -1,38 +0,0 @@
-require "csv"
-require "rack"
-require "rummager"
-
-namespace :export do
-  desc "Get all results which match the given search.  Set FIELDS to control the exported fields."
-  task :search, [:query_string] do |_, args|
-    params = Rack::Utils.parse_nested_query(args.query_string)
-               .merge("fields" => "content_id,#{ENV.fetch('FIELDS', '')}")
-               .transform_values { |v| [v] }
-    search_params = SearchConfig.parse_parameters(params)
-    query = search_params.search_config.generate_query_for_params(search_params)
-    query[:sort] = %i[document_type _uid]
-    fields = search_params.return_fields.uniq
-    base_uri = search_params.search_config.base_uri
-
-    CSV.open("export-search.csv", "wb", headers: fields, write_headers: true, force_quotes: true) do |csv|
-      ScrollEnumerator.new(
-        client: Services.elasticsearch(hosts: base_uri),
-        index_names: SearchConfig.content_index_names + [SearchConfig.govuk_index_name],
-        search_body: query,
-      ) do |hit|
-        csv << fields.map do |f|
-          value = hit["_source"][f]
-
-          case value
-          when Hash
-            value.fetch("slug", value)
-          when Array
-            value.join(",")
-          else
-            value
-          end
-        end
-      end
-    end
-  end
-end

From 7efc2ab40781489f34f171a280cb01ced330f0cc Mon Sep 17 00:00:00 2001
From: Emma <emma.lowe@digital.cabinet-office.gov.uk>
Date: Fri, 24 Apr 2026 11:00:40 +0100
Subject: [PATCH 4/7] Update delete:by_format rake task to use govuk index

The govuk index will be the only relevant index to run
this rake task on, once the government index has been
retired (this is in progress). That's because the format
field only appears in the content indexes, not the auxiliary
indexes.
---
 lib/tasks/delete.rake                 |  9 ++++-----
 spec/integration/tasks/delete_spec.rb | 16 ++++------------
 2 files changed, 8 insertions(+), 17 deletions(-)

diff --git a/lib/tasks/delete.rake b/lib/tasks/delete.rake
index b3d3d4f07..1ab984c2d 100644
--- a/lib/tasks/delete.rake
+++ b/lib/tasks/delete.rake
@@ -14,16 +14,15 @@ namespace :delete do
   end
 
   desc "
-  Delete all documents by format from an index.
+  Delete all documents by format from the govuk index.
   Usage
-  rake 'delete:by_format[format_name, elasticsearch_index]'
+  rake 'delete:by_format[format_name]'
   "
-  task :by_format, [:format, :index_name] do |_, args|
+  task :by_format, [:format] do |_, args|
     format = args[:format]
-    index  = args[:index_name]
+    index = SearchConfig.govuk_index_name
 
     abort "Specify format for deletion" if format.nil?
-    abort "Specify an index" if index.nil?
 
     warn_for_single_cluster_run
     client = Services.elasticsearch(cluster: Clusters.default_cluster, timeout: 5.0)
diff --git a/spec/integration/tasks/delete_spec.rb b/spec/integration/tasks/delete_spec.rb
index 48aa9fcce..ed6d6ba3f 100644
--- a/spec/integration/tasks/delete_spec.rb
+++ b/spec/integration/tasks/delete_spec.rb
@@ -54,28 +54,20 @@
   describe "delete:by_format" do
     let(:task_name) { "delete:by_format" }
     let(:task) { Rake::Task[task_name] }
-    let(:index) { SearchConfig.all_index_names.first }
+    let(:index) { SearchConfig.govuk_index_name }
     let(:format) { "answer" }
 
     context "when format is missing" do
       it "prints a warning" do
         expect {
-          task.invoke(nil, index)
+          task.invoke(nil)
         }.to output("Specify format for deletion\n").to_stderr.and raise_error(SystemExit)
       end
     end
 
-    context "when index_name is missing" do
-      it "prints a warning" do
-        expect {
-          task.invoke(format, nil)
-        }.to output("Specify an index\n").to_stderr.and raise_error(SystemExit)
-      end
-    end
-
     context "when there are no documents for the format" do
       it "prints no documents to delete" do
-        output = capture_stdout { task.invoke(format, index) }
+        output = capture_stdout { task.invoke(format) }
         expect(output).to match(/No #{format} documents to delete/)
       end
     end
@@ -87,7 +79,7 @@
 
       it "deletes all documents in batches" do
         output = capture_stdout do
-          expect { task.invoke(format, index) }.to change {
+          expect { task.invoke(format) }.to change {
             client.count(index:, body: { query: { term: { format: format } } })["count"]
           }.from(3).to(0)
         end

From 915bd4b5fc54e0a1f2c873b4dbeac491aa82c1ad Mon Sep 17 00:00:00 2001
From: Emma <emma.lowe@digital.cabinet-office.gov.uk>
Date: Fri, 24 Apr 2026 11:11:15 +0100
Subject: [PATCH 5/7] Update debug:show_new_synonyms to use govuk index

The govuk index will be the only relevant index to run
this rake task on, once the government index has been
retired (this is in progress). That's because the title and
description fields only appear in the content indexes,
not the auxiliary indexes.
---
 lib/tasks/debug.rake | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/tasks/debug.rake b/lib/tasks/debug.rake
index ea0398ea9..df33154a7 100644
--- a/lib/tasks/debug.rake
+++ b/lib/tasks/debug.rake
@@ -28,8 +28,8 @@ namespace :debug do
   end
 
   desc "New synonyms test"
-  task :show_new_synonyms, [:query, :index_name] do |_, args|
-    index = args.index_name || SearchConfig.govuk_index_name
+  task :show_new_synonyms, [:query] do |_, args|
+    index = SearchConfig.govuk_index_name
     model = Debug::Synonyms::Analyzer.new(index: index)
 
     search_tokens = model.analyze_query(args.query)

From 721cda04d10f38f63a7b70dfc85b932ec937595a Mon Sep 17 00:00:00 2001
From: Emma <emma.lowe@digital.cabinet-office.gov.uk>
Date: Fri, 24 Apr 2026 11:44:59 +0100
Subject: [PATCH 6/7] Update search:update_supertypes to use govuk index

The govuk index will be the only relevant index to run
this rake task on, once the government index has been
retired (this is in progress). That's because the
content_store_document_type fields only appears in the content
indexes, not the auxiliary indexes.
---
 lib/tasks/indices.rake          |  4 +---
 spec/unit/tasks/indices_spec.rb | 10 ++++------
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/lib/tasks/indices.rake b/lib/tasks/indices.rake
index 033ad4125..c61dba134 100644
--- a/lib/tasks/indices.rake
+++ b/lib/tasks/indices.rake
@@ -88,9 +88,7 @@ govuk_document_types gem using sidekiq jobs.
 This does not update the schema.
 "
   task :update_supertypes do
-    index_names.each do |index_name|
-      GovukIndex::Updater.update(index_name, GovukIndex::SupertypeJob)
-    end
+    GovukIndex::Updater.update(SearchConfig.govuk_index_name, GovukIndex::SupertypeJob)
   end
 
   desc "Migrate the data to a new schema definition
diff --git a/spec/unit/tasks/indices_spec.rb b/spec/unit/tasks/indices_spec.rb
index 51ef03ac4..a8f4467b0 100644
--- a/spec/unit/tasks/indices_spec.rb
+++ b/spec/unit/tasks/indices_spec.rb
@@ -162,16 +162,14 @@
   describe "search:update_supertypes" do
     let(:task_name) { "search:update_supertypes" }
 
-    it "updates supertypes for all indices" do
+    it "updates supertypes for govuk index" do
       allow(GovukIndex::Updater).to receive(:update)
 
       Rake::Task[task_name].invoke
 
-      index_names.each do |index_name|
-        expect(GovukIndex::Updater)
-          .to have_received(:update)
-          .with(index_name, GovukIndex::SupertypeJob)
-      end
+      expect(GovukIndex::Updater)
+        .to have_received(:update)
+        .with(govuk_index_name, GovukIndex::SupertypeJob)
     end
   end
 

From 545abd623fbda90a9de37fc349db19be9a3a404c Mon Sep 17 00:00:00 2001
From: Emma <emma.lowe@digital.cabinet-office.gov.uk>
Date: Fri, 24 Apr 2026 12:35:07 +0100
Subject: [PATCH 7/7] Update rank_eval to use govuk index

All documents have now been migrated to the govuk index,
so we can remove references to other indices.
---
 lib/debug/rank_eval.rb                 | 16 +++-------------
 spec/support/rank_eval_test_helpers.rb |  3 +--
 2 files changed, 4 insertions(+), 15 deletions(-)

diff --git a/lib/debug/rank_eval.rb b/lib/debug/rank_eval.rb
index 5a23035de..4cadd7287 100644
--- a/lib/debug/rank_eval.rb
+++ b/lib/debug/rank_eval.rb
@@ -41,7 +41,7 @@ def evaluate
           },
           ratings: data[:judgements].map do |judgement|
             {
-              _index: index_for_link(judgement[:link]),
+              _index: govuk_index_name,
               _id: judgement[:link],
               rating: judgement[:score],
             }
@@ -92,8 +92,8 @@ def rank_eval(requests)
         headers: { "Content-Type" => "application/json" },
         timeout: 120,
       }
-      indices = "*"
-      url = "#{uri}/#{indices}/_rank_eval"
+      index = SearchConfig.govuk_index_name
+      url = "#{uri}/#{index}/_rank_eval"
       response = HTTParty.post(url, options)
       puts "Elasticsearch: #{response.code}: #{response.message}"
       JSON.parse(response.body).with_indifferent_access
@@ -111,16 +111,6 @@ def ignore_extra_judgements(data)
       end
     end
 
-    def index_for_link(link)
-      return government_index_name if link.start_with? "/government/"
-
-      govuk_index_name
-    end
-
-    def government_index_name
-      @government_index_name ||= @search_config.get_index_for_alias(SearchConfig.content_index_names)
-    end
-
     def govuk_index_name
       @govuk_index_name ||= @search_config.get_index_for_alias(SearchConfig.govuk_index_name)
     end
diff --git a/spec/support/rank_eval_test_helpers.rb b/spec/support/rank_eval_test_helpers.rb
index 783682ab5..f83d46c33 100644
--- a/spec/support/rank_eval_test_helpers.rb
+++ b/spec/support/rank_eval_test_helpers.rb
@@ -6,7 +6,6 @@ def mock_judgement_csv
     CSV.generate do |csv|
       csv << %w[query rating link score]
       csv << ["harry potter", "relevant", "/harry-potter", 3]
-      # use /government to test fetching alias for government index
       csv << ["passport", "relevant", "/government/renew-a-passport", 3]
       # add repeated row to test ignore_extra_judgements
       csv << ["passport", "near", "/government/renew-a-passport", 2]
@@ -24,7 +23,7 @@ def rank_eval_expected_output
 
   def stub_rank_eval_request
     es_source = ENV["ELASTICSEARCH_URI"] || "http://localhost:9200"
-    stub_request(:post, "#{es_source}/*/_rank_eval")
+    stub_request(:post, "#{es_source}/govuk_test/_rank_eval")
       .to_return(
         status: 200,
         body: {