diff --git a/lib/debug/rank_eval.rb b/lib/debug/rank_eval.rb index 5a23035de..4cadd7287 100644 --- a/lib/debug/rank_eval.rb +++ b/lib/debug/rank_eval.rb @@ -41,7 +41,7 @@ def evaluate }, ratings: data[:judgements].map do |judgement| { - _index: index_for_link(judgement[:link]), + _index: govuk_index_name, _id: judgement[:link], rating: judgement[:score], } @@ -92,8 +92,8 @@ def rank_eval(requests) headers: { "Content-Type" => "application/json" }, timeout: 120, } - indices = "*" - url = "#{uri}/#{indices}/_rank_eval" + index = SearchConfig.govuk_index_name + url = "#{uri}/#{index}/_rank_eval" response = HTTParty.post(url, options) puts "Elasticsearch: #{response.code}: #{response.message}" JSON.parse(response.body).with_indifferent_access @@ -111,16 +111,6 @@ def ignore_extra_judgements(data) end end - def index_for_link(link) - return government_index_name if link.start_with? "/government/" - - govuk_index_name - end - - def government_index_name - @government_index_name ||= @search_config.get_index_for_alias(SearchConfig.content_index_names) - end - def govuk_index_name @govuk_index_name ||= @search_config.get_index_for_alias(SearchConfig.govuk_index_name) end diff --git a/lib/search/duplicate_finder.rb b/lib/search/duplicate_finder.rb index 79619f9d8..504d19be5 100644 --- a/lib/search/duplicate_finder.rb +++ b/lib/search/duplicate_finder.rb @@ -2,15 +2,9 @@ module Search class DuplicateFinder TIMEOUT = 60 - attr_reader :index - - def initialize(index:) - @index = index - end - def find_duplicates response = Services.elasticsearch(timeout: TIMEOUT).search( - index: index, + index: SearchConfig.govuk_index_name, size: 0, body: { aggs: { diff --git a/lib/search/duplicate_remover.rb b/lib/search/duplicate_remover.rb index c9396ab7e..54d11c55c 100644 --- a/lib/search/duplicate_remover.rb +++ b/lib/search/duplicate_remover.rb @@ -1,9 +1,8 @@ module Search class DuplicateRemover - attr_reader :index, :logger + attr_reader :logger - def initialize(index:, logger: Logger.new($stdout)) - @index = index + def initialize(logger: Logger.new($stdout)) @logger = logger end @@ -39,7 +38,7 @@ def sort_by_updated_at(documents) def delete_document(link) Services.elasticsearch.delete_by_query( - index: index, + index: SearchConfig.govuk_index_name, body: { query: { term: { link: link } } }, ) logger.info "Deleted duplicate document: #{link}" diff --git a/lib/tasks/debug.rake b/lib/tasks/debug.rake index ea0398ea9..df33154a7 100644 --- a/lib/tasks/debug.rake +++ b/lib/tasks/debug.rake @@ -28,8 +28,8 @@ namespace :debug do end desc "New synonyms test" - task :show_new_synonyms, [:query, :index_name] do |_, args| - index = args.index_name || SearchConfig.govuk_index_name + task :show_new_synonyms, [:query] do |_, args| + index = SearchConfig.govuk_index_name model = Debug::Synonyms::Analyzer.new(index: index) search_tokens = model.analyze_query(args.query) diff --git a/lib/tasks/delete.rake b/lib/tasks/delete.rake index b3d3d4f07..1ab984c2d 100644 --- a/lib/tasks/delete.rake +++ b/lib/tasks/delete.rake @@ -14,16 +14,15 @@ namespace :delete do end desc " - Delete all documents by format from an index. + Delete all documents by format from the govuk index. Usage - rake 'delete:by_format[format_name, elasticsearch_index]' + rake 'delete:by_format[format_name]' " - task :by_format, [:format, :index_name] do |_, args| + task :by_format, [:format] do |_, args| format = args[:format] - index = args[:index_name] + index = SearchConfig.govuk_index_name abort "Specify format for deletion" if format.nil? - abort "Specify an index" if index.nil? warn_for_single_cluster_run client = Services.elasticsearch(cluster: Clusters.default_cluster, timeout: 5.0) diff --git a/lib/tasks/duplicates.rake b/lib/tasks/duplicates.rake index a85f6d6aa..c05f73e05 100644 --- a/lib/tasks/duplicates.rake +++ b/lib/tasks/duplicates.rake @@ -2,10 +2,8 @@ require "rummager" namespace :duplicates do desc "Find all documents with the same content_id" - task :find, [:index] do |_t, args| - index = args[:index] || "government" - - duplicates = Search::DuplicateFinder.new(index:).find_duplicates + task :find do + duplicates = Search::DuplicateFinder.new.find_duplicates duplicates.each do |duplicate| puts "Content_id: #{duplicate[:content_id]}" @@ -16,12 +14,10 @@ namespace :duplicates do end desc "Find all documents with the same content_id and remove them" - task :remove, [:index] do |_t, args| - index = args[:index] || "government" - - duplicates = Search::DuplicateFinder.new(index:).find_duplicates + task :remove do + duplicates = Search::DuplicateFinder.new.find_duplicates puts "No duplicates found" if duplicates.empty? - Search::DuplicateRemover.new(index:).remove_duplicates(duplicates: duplicates) + Search::DuplicateRemover.new.remove_duplicates(duplicates: duplicates) end end diff --git a/lib/tasks/export.rake b/lib/tasks/export.rake deleted file mode 100644 index e60b6f041..000000000 --- a/lib/tasks/export.rake +++ /dev/null @@ -1,38 +0,0 @@ -require "csv" -require "rack" -require "rummager" - -namespace :export do - desc "Get all results which match the given search. Set FIELDS to control the exported fields." - task :search, [:query_string] do |_, args| - params = Rack::Utils.parse_nested_query(args.query_string) - .merge("fields" => "content_id,#{ENV.fetch('FIELDS', '')}") - .transform_values { |v| [v] } - search_params = SearchConfig.parse_parameters(params) - query = search_params.search_config.generate_query_for_params(search_params) - query[:sort] = %i[document_type _uid] - fields = search_params.return_fields.uniq - base_uri = search_params.search_config.base_uri - - CSV.open("export-search.csv", "wb", headers: fields, write_headers: true, force_quotes: true) do |csv| - ScrollEnumerator.new( - client: Services.elasticsearch(hosts: base_uri), - index_names: SearchConfig.content_index_names + [SearchConfig.govuk_index_name], - search_body: query, - ) do |hit| - csv << fields.map do |f| - value = hit["_source"][f] - - case value - when Hash - value.fetch("slug", value) - when Array - value.join(",") - else - value - end - end - end - end - end -end diff --git a/lib/tasks/indices.rake b/lib/tasks/indices.rake index 033ad4125..c61dba134 100644 --- a/lib/tasks/indices.rake +++ b/lib/tasks/indices.rake @@ -88,9 +88,7 @@ govuk_document_types gem using sidekiq jobs. This does not update the schema. " task :update_supertypes do - index_names.each do |index_name| - GovukIndex::Updater.update(index_name, GovukIndex::SupertypeJob) - end + GovukIndex::Updater.update(SearchConfig.govuk_index_name, GovukIndex::SupertypeJob) end desc "Migrate the data to a new schema definition diff --git a/spec/integration/search/duplicate_finder_spec.rb b/spec/integration/search/duplicate_finder_spec.rb index 692bc2c37..fa5a4bcaa 100644 --- a/spec/integration/search/duplicate_finder_spec.rb +++ b/spec/integration/search/duplicate_finder_spec.rb @@ -4,7 +4,7 @@ let(:index) { "govuk_test" } describe "there are no documents in Elasticsearch" do it "returns an empty array" do - expect(Search::DuplicateFinder.new(index:).find_duplicates).to eq([]) + expect(Search::DuplicateFinder.new.find_duplicates).to eq([]) end end describe "there are documents in Elasticsearch, none have a duplicate content_id" do @@ -12,7 +12,7 @@ (1..10).each do |n| commit_document(index, { link: "link/path#{n}", content_id: SecureRandom.uuid }) end - expect(Search::DuplicateFinder.new(index:).find_duplicates).to be_empty + expect(Search::DuplicateFinder.new.find_duplicates).to be_empty end end describe "there are documents in Elasticsearch, some have a duplicate content_id" do @@ -27,7 +27,7 @@ commit_document(index, { link: "link/path_c", content_id: "other", title: "title_c", updated_at: date_2 }) commit_document(index, { link: "link/path_d", content_id: "other", title: "title_d" }) - result = Search::DuplicateFinder.new(index:).find_duplicates + result = Search::DuplicateFinder.new.find_duplicates expect(result).to match_array([ a_hash_including( diff --git a/spec/integration/search/duplicate_remover_spec.rb b/spec/integration/search/duplicate_remover_spec.rb index e26e2d6cb..44671d20f 100644 --- a/spec/integration/search/duplicate_remover_spec.rb +++ b/spec/integration/search/duplicate_remover_spec.rb @@ -1,11 +1,11 @@ require "spec_helper" RSpec.describe Search::DuplicateRemover do - let(:index) { "government_test" } + let(:index) { "govuk_test" } let(:io) { StringIO.new } let(:logger) { Logger.new(io) } - let(:duplicates) { Search::DuplicateFinder.new(index:).find_duplicates } - subject(:remover) { described_class.new(index:, logger:) } + let(:duplicates) { Search::DuplicateFinder.new.find_duplicates } + subject(:remover) { described_class.new(logger:) } context "A set of duplicate documents has no updated_at field" do before :each do diff --git a/spec/integration/tasks/delete_spec.rb b/spec/integration/tasks/delete_spec.rb index 48aa9fcce..ed6d6ba3f 100644 --- a/spec/integration/tasks/delete_spec.rb +++ b/spec/integration/tasks/delete_spec.rb @@ -54,28 +54,20 @@ describe "delete:by_format" do let(:task_name) { "delete:by_format" } let(:task) { Rake::Task[task_name] } - let(:index) { SearchConfig.all_index_names.first } + let(:index) { SearchConfig.govuk_index_name } let(:format) { "answer" } context "when format is missing" do it "prints a warning" do expect { - task.invoke(nil, index) + task.invoke(nil) }.to output("Specify format for deletion\n").to_stderr.and raise_error(SystemExit) end end - context "when index_name is missing" do - it "prints a warning" do - expect { - task.invoke(format, nil) - }.to output("Specify an index\n").to_stderr.and raise_error(SystemExit) - end - end - context "when there are no documents for the format" do it "prints no documents to delete" do - output = capture_stdout { task.invoke(format, index) } + output = capture_stdout { task.invoke(format) } expect(output).to match(/No #{format} documents to delete/) end end @@ -87,7 +79,7 @@ it "deletes all documents in batches" do output = capture_stdout do - expect { task.invoke(format, index) }.to change { + expect { task.invoke(format) }.to change { client.count(index:, body: { query: { term: { format: format } } })["count"] }.from(3).to(0) end diff --git a/spec/support/rank_eval_test_helpers.rb b/spec/support/rank_eval_test_helpers.rb index 783682ab5..f83d46c33 100644 --- a/spec/support/rank_eval_test_helpers.rb +++ b/spec/support/rank_eval_test_helpers.rb @@ -6,7 +6,6 @@ def mock_judgement_csv CSV.generate do |csv| csv << %w[query rating link score] csv << ["harry potter", "relevant", "/harry-potter", 3] - # use /government to test fetching alias for government index csv << ["passport", "relevant", "/government/renew-a-passport", 3] # add repeated row to test ignore_extra_judgements csv << ["passport", "near", "/government/renew-a-passport", 2] @@ -24,7 +23,7 @@ def rank_eval_expected_output def stub_rank_eval_request es_source = ENV["ELASTICSEARCH_URI"] || "http://localhost:9200" - stub_request(:post, "#{es_source}/*/_rank_eval") + stub_request(:post, "#{es_source}/govuk_test/_rank_eval") .to_return( status: 200, body: { diff --git a/spec/unit/tasks/duplicates_spec.rb b/spec/unit/tasks/duplicates_spec.rb index 708715c41..4ef4165b1 100644 --- a/spec/unit/tasks/duplicates_spec.rb +++ b/spec/unit/tasks/duplicates_spec.rb @@ -20,21 +20,19 @@ }, ] end - let(:index) { "govuk" } before do Rake::Task[task_name].reenable allow(Search::DuplicateFinder) .to receive(:new) - .with(index:) - .and_return(double(find_duplicates: fake_duplicates)) + .and_return(double(find_duplicates: fake_duplicates)) end describe "duplicates:find" do let(:task_name) { "duplicates:find" } it "prints duplicate sets in the expected format" do - output = capture_stdout { Rake::Task[task_name].invoke(index) } + output = capture_stdout { Rake::Task[task_name].invoke } expect(output).to include("Content_id: aaa-111") expect(output).to include(" T1 /a1 2020-01-01") @@ -51,19 +49,18 @@ before do allow(Search::DuplicateRemover) .to receive(:new) - .with(index:) - .and_return(duplicate_remover) + .and_return(duplicate_remover) end describe "there are duplicates" do it "removes duplicates" do - Rake::Task[task_name].invoke(index) + Rake::Task[task_name].invoke expect(duplicate_remover).to have_received(:remove_duplicates).with(duplicates: fake_duplicates).once end end describe "there are no duplicates" do let(:fake_duplicates) { [] } it "does not remove duplicates" do - output = capture_stdout { Rake::Task[task_name].invoke(index) } + output = capture_stdout { Rake::Task[task_name].invoke } expect(output).to eq("No duplicates found\n") end end diff --git a/spec/unit/tasks/indices_spec.rb b/spec/unit/tasks/indices_spec.rb index 51ef03ac4..a8f4467b0 100644 --- a/spec/unit/tasks/indices_spec.rb +++ b/spec/unit/tasks/indices_spec.rb @@ -162,16 +162,14 @@ describe "search:update_supertypes" do let(:task_name) { "search:update_supertypes" } - it "updates supertypes for all indices" do + it "updates supertypes for govuk index" do allow(GovukIndex::Updater).to receive(:update) Rake::Task[task_name].invoke - index_names.each do |index_name| - expect(GovukIndex::Updater) - .to have_received(:update) - .with(index_name, GovukIndex::SupertypeJob) - end + expect(GovukIndex::Updater) + .to have_received(:update) + .with(govuk_index_name, GovukIndex::SupertypeJob) end end