diff --git a/.gitignore b/.gitignore index 73bd851ebbdd..67f5004899ee 100644 --- a/.gitignore +++ b/.gitignore @@ -49,3 +49,8 @@ GEMINI.md # WANT TO ADD MORE? You can tell Git without adding to this file: # See https://git-scm.com/docs/gitignore # In particular, if you have tools you use, add to $GIT_DIR/info/exclude or use core.excludesFile + +# Azure Blob Storage testing artifacts (local testing only) +AzuriteConfig +__azurite_db_*.json +__blobstorage__/ diff --git a/changelog/unreleased/SOLR-17949-azure-blob-repository.yml b/changelog/unreleased/SOLR-17949-azure-blob-repository.yml new file mode 100644 index 000000000000..6ec39bd703dd --- /dev/null +++ b/changelog/unreleased/SOLR-17949-azure-blob-repository.yml @@ -0,0 +1,11 @@ +# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc +title: Add Azure Blob Storage backup repository module +type: added +authors: + - name: Prateek Singhal +description: | + Added AzureBlobBackupRepository module for backing up and restoring Solr collections to Azure Blob Storage. + Supports multiple authentication methods: connection string, account name + key, SAS token, and Azure Identity (service principal, managed identity). +links: + - name: SOLR-17949 + url: https://issues.apache.org/jira/browse/SOLR-17949 diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index e6c402e8d9b9..5f9141d67355 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -49,6 +49,10 @@ asciidoctor-mathjax = "0.0.9" # @keep Asciidoctor tabs version used in ref-guide asciidoctor-tabs = "1.0.0-beta.6" azagniotov-langdetect = "12.5.2" +azure-core = "1.52.0" +azure-core-http-okhttp = "1.13.2" +azure-identity = "1.12.0" +azure-storage = "12.25.0" # @keep bats-assert (node) version used in packaging bats-assert = "2.0.0" # @keep bats-core (node) version used in packaging @@ -296,6 +300,11 @@ apache-zookeeper-zookeeper = { module = "org.apache.zookeeper:zookeeper", versio # @keep transitive dependency for version alignment apiguardian-api = { module = "org.apiguardian:apiguardian-api", version.ref = "apiguardian" } azagniotov-langdetect = { module = "io.github.azagniotov:language-detection", version.ref = "azagniotov-langdetect" } +azure-core = { module = "com.azure:azure-core", version.ref = "azure-core" } +azure-core-http-okhttp = { module = "com.azure:azure-core-http-okhttp", version.ref = "azure-core-http-okhttp" } +azure-identity = { module = "com.azure:azure-identity", version.ref = "azure-identity" } +azure-storage-blob = { module = "com.azure:azure-storage-blob", version.ref = "azure-storage" } +azure-storage-common = { module = "com.azure:azure-storage-common", version.ref = "azure-storage" } bc-jose4j = { module = "org.bitbucket.b_c:jose4j", version.ref = "bc-jose4j" } benmanes-caffeine = { module = "com.github.ben-manes.caffeine:caffeine", version.ref = "benmanes-caffeine" } bouncycastle-bcpkix = { module = "org.bouncycastle:bcpkix-jdk18on", version.ref = "bouncycastle" } diff --git a/settings.gradle b/settings.gradle index 782edec43251..ed296cde5ec0 100644 --- a/settings.gradle +++ b/settings.gradle @@ -45,6 +45,7 @@ include "solr:core" include "solr:cross-dc-manager" include "solr:server" include "solr:modules:analysis-extras" +include "solr:modules:azure-blob-repository" include "solr:modules:clustering" include "solr:modules:cross-dc" include "solr:modules:cuvs" diff --git a/solr/licenses/accessors-smart-2.5.0.jar.sha1 b/solr/licenses/accessors-smart-2.5.0.jar.sha1 new file mode 100644 index 000000000000..60d26d2d99fa --- /dev/null +++ b/solr/licenses/accessors-smart-2.5.0.jar.sha1 @@ -0,0 +1 @@ +aca011492dfe9c26f4e0659028a4fe0970829dd8 diff --git a/solr/licenses/azure-LICENSE-MIT.txt b/solr/licenses/azure-LICENSE-MIT.txt new file mode 100644 index 000000000000..b8b569d7746d --- /dev/null +++ b/solr/licenses/azure-LICENSE-MIT.txt @@ -0,0 +1,22 @@ +The MIT License (MIT) + +Copyright (c) 2015 Microsoft + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/solr/licenses/azure-core-1.57.0.jar.sha1 b/solr/licenses/azure-core-1.57.0.jar.sha1 new file mode 100644 index 000000000000..61da6e275e4e --- /dev/null +++ b/solr/licenses/azure-core-1.57.0.jar.sha1 @@ -0,0 +1 @@ +4fe5978491bb9a305b98dc5456a138ad7ba0f250 diff --git a/solr/licenses/azure-core-http-okhttp-1.13.2.jar.sha1 b/solr/licenses/azure-core-http-okhttp-1.13.2.jar.sha1 new file mode 100644 index 000000000000..c7a3ae4a128a --- /dev/null +++ b/solr/licenses/azure-core-http-okhttp-1.13.2.jar.sha1 @@ -0,0 +1 @@ +fd743d404300f134a2740c6d2ec8dbf9ebafcf04 diff --git a/solr/licenses/azure-identity-1.12.0.jar.sha1 b/solr/licenses/azure-identity-1.12.0.jar.sha1 new file mode 100644 index 000000000000..1dcd782fa8d0 --- /dev/null +++ b/solr/licenses/azure-identity-1.12.0.jar.sha1 @@ -0,0 +1 @@ +1d7efb089db2fe7a60526b8ff50b0c681fe1b079 diff --git a/solr/licenses/azure-json-1.5.0.jar.sha1 b/solr/licenses/azure-json-1.5.0.jar.sha1 new file mode 100644 index 000000000000..06c3f5e6cdc8 --- /dev/null +++ b/solr/licenses/azure-json-1.5.0.jar.sha1 @@ -0,0 +1 @@ +d12cf1a1d31ca75b27a5bbe0fbcf5ad73b7471b5 diff --git a/solr/licenses/azure-storage-blob-12.25.0.jar.sha1 b/solr/licenses/azure-storage-blob-12.25.0.jar.sha1 new file mode 100644 index 000000000000..1cfc20dfc28d --- /dev/null +++ b/solr/licenses/azure-storage-blob-12.25.0.jar.sha1 @@ -0,0 +1 @@ +94e0aed4a4cc8496d813e4432f840cb284b47ac5 diff --git a/solr/licenses/azure-storage-common-12.25.0.jar.sha1 b/solr/licenses/azure-storage-common-12.25.0.jar.sha1 new file mode 100644 index 000000000000..6aacac9e105e --- /dev/null +++ b/solr/licenses/azure-storage-common-12.25.0.jar.sha1 @@ -0,0 +1 @@ +4c2c2eebb4195fa186a26257572789dd31f86493 diff --git a/solr/licenses/azure-storage-internal-avro-12.10.0.jar.sha1 b/solr/licenses/azure-storage-internal-avro-12.10.0.jar.sha1 new file mode 100644 index 000000000000..3446b7706813 --- /dev/null +++ b/solr/licenses/azure-storage-internal-avro-12.10.0.jar.sha1 @@ -0,0 +1 @@ +8fe0d236b37610be22944a69332f79e880b7203f diff --git a/solr/licenses/azure-xml-1.2.0.jar.sha1 b/solr/licenses/azure-xml-1.2.0.jar.sha1 new file mode 100644 index 000000000000..75c0d7a6e8b9 --- /dev/null +++ b/solr/licenses/azure-xml-1.2.0.jar.sha1 @@ -0,0 +1 @@ +05a811882dc4eba119c7d1f0fc65acf39eaf417c diff --git a/solr/licenses/content-type-2.3.jar.sha1 b/solr/licenses/content-type-2.3.jar.sha1 new file mode 100644 index 000000000000..7718175e95f9 --- /dev/null +++ b/solr/licenses/content-type-2.3.jar.sha1 @@ -0,0 +1 @@ +e3aa0be212d7a42839a8f3f506f5b990bcce0222 diff --git a/solr/licenses/jna-platform-5.13.0.jar.sha1 b/solr/licenses/jna-platform-5.13.0.jar.sha1 new file mode 100644 index 000000000000..2c60ada13780 --- /dev/null +++ b/solr/licenses/jna-platform-5.13.0.jar.sha1 @@ -0,0 +1 @@ +88e9a306715e9379f3122415ef4ae759a352640d diff --git a/solr/licenses/json-smart-2.5.0.jar.sha1 b/solr/licenses/json-smart-2.5.0.jar.sha1 new file mode 100644 index 000000000000..2c839a3e5af1 --- /dev/null +++ b/solr/licenses/json-smart-2.5.0.jar.sha1 @@ -0,0 +1 @@ +57a64f421b472849c40e77d2e7cce3a141b41e99 diff --git a/solr/licenses/msal4j-1.15.0.jar.sha1 b/solr/licenses/msal4j-1.15.0.jar.sha1 new file mode 100644 index 000000000000..25d68664fd0b --- /dev/null +++ b/solr/licenses/msal4j-1.15.0.jar.sha1 @@ -0,0 +1 @@ +52fd60d5dc3f0fb3ed5c19b63f6f2312cd1f6add diff --git a/solr/licenses/msal4j-LICENSE-MIT.txt b/solr/licenses/msal4j-LICENSE-MIT.txt new file mode 100644 index 000000000000..ad22b888b221 --- /dev/null +++ b/solr/licenses/msal4j-LICENSE-MIT.txt @@ -0,0 +1,22 @@ +MIT License + +Copyright (c) Microsoft Corporation. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE + diff --git a/solr/licenses/msal4j-persistence-extension-1.3.0.jar.sha1 b/solr/licenses/msal4j-persistence-extension-1.3.0.jar.sha1 new file mode 100644 index 000000000000..0131bb7b2a04 --- /dev/null +++ b/solr/licenses/msal4j-persistence-extension-1.3.0.jar.sha1 @@ -0,0 +1 @@ +8a8ef1517d27a5b4de1512ef94679bdb59f210b6 diff --git a/solr/licenses/oauth2-oidc-sdk-11.9.1.jar.sha1 b/solr/licenses/oauth2-oidc-sdk-11.9.1.jar.sha1 new file mode 100644 index 000000000000..3d7d85862600 --- /dev/null +++ b/solr/licenses/oauth2-oidc-sdk-11.9.1.jar.sha1 @@ -0,0 +1 @@ +fa9a2e447e2cef4dfda40a854dd7ec35624a7799 diff --git a/solr/licenses/reactor-LICENSE-ASL.txt b/solr/licenses/reactor-LICENSE-ASL.txt new file mode 100644 index 000000000000..1eef70a9b9f4 --- /dev/null +++ b/solr/licenses/reactor-LICENSE-ASL.txt @@ -0,0 +1,206 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + Note: Other license terms may apply to certain, identified software files contained within or distributed + with the accompanying software if such terms are included in the directory containing the accompanying software. + Such other license terms will then apply in lieu of the terms of the software license above. diff --git a/solr/licenses/reactor-NOTICE.txt b/solr/licenses/reactor-NOTICE.txt new file mode 100644 index 000000000000..990ac4433824 --- /dev/null +++ b/solr/licenses/reactor-NOTICE.txt @@ -0,0 +1,7 @@ +Project Reactor +Copyright (c) 2011-2024 VMware Inc. or its affiliates, All Rights Reserved. + +This product includes software developed at +VMware Inc. (https://github.com/reactor) + +Licensed under the Apache License 2.0 diff --git a/solr/licenses/reactor-core-3.7.11.jar.sha1 b/solr/licenses/reactor-core-3.7.11.jar.sha1 new file mode 100644 index 000000000000..cae3d145d817 --- /dev/null +++ b/solr/licenses/reactor-core-3.7.11.jar.sha1 @@ -0,0 +1 @@ +8ac8ee9da2424c81c029f8c361e34838f77a1b78 diff --git a/solr/modules/azure-blob-repository/README.md b/solr/modules/azure-blob-repository/README.md new file mode 100644 index 000000000000..1a4e0accca71 --- /dev/null +++ b/solr/modules/azure-blob-repository/README.md @@ -0,0 +1,101 @@ + + +# Apache Solr Azure Blob Storage Backup Repository + +A backup repository implementation for storing Solr backups in Azure Blob Storage. + +## Prerequisites + +- Azure Storage Account with a blob container (must already exist) +- Network access to Azure Blob Storage (HTTPS port 443) + +Enable the module: +```bash +export SOLR_MODULES=azure-blob-repository +``` + +## Configuration + +Add to `solr.xml`: + +```xml + + + YOUR_CONTAINER_NAME + + + +``` + +## Authentication Methods + +### Connection String (Development) + +```xml +DefaultEndpointsProtocol=https;AccountName=...;AccountKey=...;EndpointSuffix=core.windows.net +``` + +### SAS Token (Production) + +Generate a SAS token with permissions: Read, Write, Delete, List, Add, Create (`sp=rwdlac`) and resource types: Service, Container, Object (`srt=sco`). + +```xml +https://YOUR_ACCOUNT.blob.core.windows.net +sv=2024-11-04&ss=b&srt=sco&sp=rwdlac&... +``` + +Note: Escape `&` as `&` in XML. + +### Azure Identity (Production - Recommended) + +Uses Azure AD authentication. Requires "Storage Blob Data Contributor" role on the storage account. + +```xml +https://YOUR_ACCOUNT.blob.core.windows.net + +``` + +For Service Principal, add: +```xml +YOUR_TENANT_ID +YOUR_CLIENT_ID +YOUR_CLIENT_SECRET +``` + +Or set environment variables: `AZURE_TENANT_ID`, `AZURE_CLIENT_ID`, `AZURE_CLIENT_SECRET`. + +## Usage + +```bash +# Backup +curl "http://localhost:8983/solr/admin/collections?action=BACKUP&name=my-backup&collection=my-collection&repository=azure_blob&location=/" + +# Restore +curl "http://localhost:8983/solr/admin/collections?action=RESTORE&name=my-backup&collection=my-collection&repository=azure_blob&location=/" + +# List backups +curl "http://localhost:8983/solr/admin/collections?action=LISTBACKUP&name=my-backup&repository=azure_blob&location=/" +``` + +## Troubleshooting + +**403 Forbidden**: Check SAS token permissions (`srt=sco`, `sp=rwdlac`) or RBAC role assignment. + +**Signature did not match**: Ensure `&` is escaped as `&` in XML and no whitespace in token. + +**DefaultAzureCredential failed**: Run `az login` or verify service principal credentials. diff --git a/solr/modules/azure-blob-repository/build.gradle b/solr/modules/azure-blob-repository/build.gradle new file mode 100644 index 000000000000..62f40b16f331 --- /dev/null +++ b/solr/modules/azure-blob-repository/build.gradle @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +apply plugin: 'java-library' + +description = 'Azure Blob Storage Repository' + +ext { + // Disable security manager for azure-blob-repository module tests + // Required because Testcontainers needs access to Docker socket and system properties + useSecurityManager = false +} + +dependencies { + implementation platform(project(':platform')) + api(project(':solr:core')) + implementation project(':solr:solrj') + + implementation libs.apache.lucene.core + + // Azure Storage SDK dependencies + implementation(libs.azure.storage.blob) { + exclude group: 'com.azure', module: 'azure-core-http-netty' + } + implementation(libs.azure.identity) { + exclude group: 'com.azure', module: 'azure-core-http-netty' + } + implementation(libs.azure.core) { + exclude group: 'com.azure', module: 'azure-core-http-netty' + } + implementation libs.azure.core.http.okhttp + implementation(libs.azure.storage.common) { + exclude group: 'com.azure', module: 'azure-core-http-netty' + } + + implementation libs.google.guava + implementation libs.slf4j.api + + runtimeOnly libs.fasterxml.woodstox.core + runtimeOnly libs.codehaus.woodstox.stax2api + + testImplementation project(':solr:test-framework') + testImplementation libs.junit.junit + testImplementation libs.commonsio.commonsio + + // OkHttp for test client management + testImplementation libs.azure.core.http.okhttp + + // Testcontainers for Azurite integration testing + testImplementation libs.testcontainers + + // Explicit transitive test dependencies for dependency analyzer + testImplementation libs.carrotsearch.randomizedtesting.runner + testImplementation libs.apache.lucene.testframework +} \ No newline at end of file diff --git a/solr/modules/azure-blob-repository/gradle.lockfile b/solr/modules/azure-blob-repository/gradle.lockfile new file mode 100644 index 000000000000..01e50d22eb64 --- /dev/null +++ b/solr/modules/azure-blob-repository/gradle.lockfile @@ -0,0 +1,207 @@ +# This is a Gradle generated file for dependency locking. +# Manual edits can break the build and are not advised. +# This file is expected to be part of source control. +com.azure:azure-core-http-okhttp:1.13.2=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +com.azure:azure-core:1.57.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +com.azure:azure-identity:1.12.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +com.azure:azure-json:1.5.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +com.azure:azure-storage-blob:12.25.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +com.azure:azure-storage-common:12.25.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +com.azure:azure-storage-internal-avro:12.10.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +com.azure:azure-xml:1.2.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +com.carrotsearch.randomizedtesting:randomizedtesting-runner:2.8.4=jarValidation,testCompileClasspath,testRuntimeClasspath +com.carrotsearch:hppc:0.10.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +com.fasterxml.jackson.core:jackson-annotations:2.21=apiHelper,compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +com.fasterxml.jackson.core:jackson-core:2.21.2=apiHelper,compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +com.fasterxml.jackson.core:jackson-databind:2.21.2=apiHelper,compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +com.fasterxml.jackson.dataformat:jackson-dataformat-cbor:2.21.2=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +com.fasterxml.jackson.dataformat:jackson-dataformat-smile:2.21.2=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +com.fasterxml.jackson.dataformat:jackson-dataformat-xml:2.21.2=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +com.fasterxml.jackson.datatype:jackson-datatype-jsr310:2.21.2=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +com.fasterxml.jackson.module:jackson-module-jakarta-xmlbind-annotations:2.21.2=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +com.fasterxml.jackson:jackson-bom:2.21.2=apiHelper,compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +com.fasterxml.woodstox:woodstox-core:7.0.0=apiHelper +com.fasterxml.woodstox:woodstox-core:7.1.1=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +com.github.ben-manes.caffeine:caffeine:3.2.3=annotationProcessor,apiHelper,errorprone,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testAnnotationProcessor,testRuntimeClasspath +com.github.docker-java:docker-java-api:3.7.0=jarValidation,testCompileClasspath,testRuntimeClasspath +com.github.docker-java:docker-java-transport-zerodep:3.7.0=jarValidation,testCompileClasspath,testRuntimeClasspath +com.github.docker-java:docker-java-transport:3.7.0=jarValidation,testCompileClasspath,testRuntimeClasspath +com.github.kevinstern:software-and-algorithms:1.0=annotationProcessor,errorprone,testAnnotationProcessor +com.github.stephenc.jcip:jcip-annotations:1.0-1=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +com.google.auto.service:auto-service-annotations:1.0.1=annotationProcessor,errorprone,testAnnotationProcessor +com.google.auto.value:auto-value-annotations:1.11.0=annotationProcessor,errorprone,testAnnotationProcessor +com.google.auto:auto-common:1.2.2=annotationProcessor,errorprone,testAnnotationProcessor +com.google.errorprone:error_prone_annotation:2.41.0=annotationProcessor,errorprone,testAnnotationProcessor +com.google.errorprone:error_prone_annotations:2.41.0=apiHelper,compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +com.google.errorprone:error_prone_annotations:2.43.0=annotationProcessor,errorprone,testAnnotationProcessor +com.google.errorprone:error_prone_check_api:2.41.0=annotationProcessor,errorprone,testAnnotationProcessor +com.google.errorprone:error_prone_core:2.41.0=annotationProcessor,errorprone,testAnnotationProcessor +com.google.googlejavaformat:google-java-format:1.27.0=annotationProcessor,errorprone,testAnnotationProcessor +com.google.guava:failureaccess:1.0.3=annotationProcessor,apiHelper,compileClasspath,errorprone,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testAnnotationProcessor,testCompileClasspath,testRuntimeClasspath +com.google.guava:guava:33.5.0-jre=annotationProcessor,apiHelper,compileClasspath,errorprone,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testAnnotationProcessor,testCompileClasspath,testRuntimeClasspath +com.google.guava:listenablefuture:9999.0-empty-to-avoid-conflict-with-guava=annotationProcessor,apiHelper,compileClasspath,errorprone,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testAnnotationProcessor,testCompileClasspath,testRuntimeClasspath +com.google.j2objc:j2objc-annotations:3.1=annotationProcessor,apiHelper,compileClasspath,errorprone,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testAnnotationProcessor,testCompileClasspath,testRuntimeClasspath +com.google.protobuf:protobuf-java:3.25.8=annotationProcessor,errorprone,testAnnotationProcessor +com.j256.simplemagic:simplemagic:1.17=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +com.jayway.jsonpath:json-path:2.9.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +com.lmax:disruptor:4.0.0=solrPlatformLibs +com.microsoft.azure:msal4j-persistence-extension:1.3.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +com.microsoft.azure:msal4j:1.15.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +com.nimbusds:content-type:2.3=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +com.nimbusds:lang-tag:1.7=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +com.nimbusds:nimbus-jose-jwt:10.5=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +com.nimbusds:oauth2-oidc-sdk:11.9.1=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +com.squareup.okhttp3:okhttp:4.12.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +com.squareup.okio:okio-jvm:3.16.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +com.tdunning:t-digest:3.3=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +commons-cli:commons-cli:1.11.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +commons-codec:commons-codec:1.21.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +commons-io:commons-io:2.21.0=apiHelper,compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +io.dropwizard.metrics:metrics-annotation:4.2.33=jarValidation,testRuntimeClasspath +io.dropwizard.metrics:metrics-core:4.2.33=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +io.dropwizard.metrics:metrics-jetty12-ee10:4.2.33=jarValidation,testRuntimeClasspath +io.dropwizard.metrics:metrics-jetty12:4.2.33=jarValidation,testRuntimeClasspath +io.github.eisop:dataflow-errorprone:3.41.0-eisop1=annotationProcessor,errorprone,testAnnotationProcessor +io.github.java-diff-utils:java-diff-utils:4.12=annotationProcessor,errorprone,testAnnotationProcessor +io.netty:netty-buffer:4.2.12.Final=apiHelper,compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +io.netty:netty-codec-base:4.2.12.Final=apiHelper,compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +io.netty:netty-common:4.2.12.Final=apiHelper,compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +io.netty:netty-handler:4.2.12.Final=apiHelper,compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +io.netty:netty-resolver:4.2.12.Final=apiHelper,compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +io.netty:netty-tcnative-boringssl-static:2.0.75.Final=apiHelper,compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +io.netty:netty-tcnative-classes:2.0.75.Final=apiHelper,compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +io.netty:netty-transport-classes-epoll:4.2.12.Final=apiHelper,compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +io.netty:netty-transport-native-epoll:4.2.12.Final=apiHelper,compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +io.netty:netty-transport-native-unix-common:4.2.12.Final=apiHelper,compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +io.netty:netty-transport:4.2.12.Final=apiHelper,compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +io.opentelemetry.instrumentation:opentelemetry-instrumentation-api-incubator:2.22.0-alpha=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +io.opentelemetry.instrumentation:opentelemetry-instrumentation-api:2.22.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +io.opentelemetry.instrumentation:opentelemetry-runtime-telemetry-java17:2.22.0-alpha=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +io.opentelemetry.instrumentation:opentelemetry-runtime-telemetry-java8:2.22.0-alpha=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +io.opentelemetry.semconv:opentelemetry-semconv:1.37.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +io.opentelemetry:opentelemetry-api-incubator:1.56.0-alpha=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +io.opentelemetry:opentelemetry-api:1.56.0=apiHelper,compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +io.opentelemetry:opentelemetry-common:1.56.0=apiHelper,compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +io.opentelemetry:opentelemetry-context:1.56.0=apiHelper,compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +io.opentelemetry:opentelemetry-exporter-prometheus:1.56.0-alpha=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +io.opentelemetry:opentelemetry-sdk-common:1.56.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +io.opentelemetry:opentelemetry-sdk-metrics:1.56.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +io.opentelemetry:opentelemetry-sdk-trace:1.56.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +io.opentelemetry:opentelemetry-sdk:1.56.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +io.projectreactor:reactor-core:3.7.11=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +io.prometheus:prometheus-metrics-exposition-formats:1.1.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +io.prometheus:prometheus-metrics-model:1.1.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +io.sgr:s2-geometry-library-java:1.0.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +io.swagger.core.v3:swagger-annotations-jakarta:2.2.22=apiHelper,compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +jakarta.activation:jakarta.activation-api:2.1.3=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +jakarta.annotation:jakarta.annotation-api:3.0.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +jakarta.inject:jakarta.inject-api:2.0.1=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +jakarta.servlet:jakarta.servlet-api:6.1.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +jakarta.validation:jakarta.validation-api:3.1.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +jakarta.ws.rs:jakarta.ws.rs-api:4.0.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +jakarta.xml.bind:jakarta.xml.bind-api:4.0.2=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +javax.inject:javax.inject:1=annotationProcessor,errorprone,testAnnotationProcessor +junit:junit:4.13.2=jarValidation,testCompileClasspath,testRuntimeClasspath +net.java.dev.jna:jna-platform:5.13.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +net.java.dev.jna:jna:5.18.1=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +net.minidev:accessors-smart:2.5.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +net.minidev:json-smart:2.5.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +org.antlr:antlr4-runtime:4.13.2=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.commons:commons-compress:1.28.0=jarValidation,testCompileClasspath,testRuntimeClasspath +org.apache.commons:commons-exec:1.6.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.commons:commons-lang3:3.20.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +org.apache.commons:commons-math3:3.6.1=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.curator:curator-client:5.9.0=apiHelper,compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +org.apache.curator:curator-framework:5.9.0=apiHelper,compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +org.apache.curator:curator-test:5.9.0=jarValidation,testRuntimeClasspath +org.apache.httpcomponents:httpclient:4.5.14=jarValidation,testRuntimeClasspath +org.apache.httpcomponents:httpcore:4.4.16=jarValidation,testRuntimeClasspath +org.apache.httpcomponents:httpmime:4.5.14=jarValidation,testRuntimeClasspath +org.apache.logging.log4j:log4j-1.2-api:2.25.3=solrPlatformLibs +org.apache.logging.log4j:log4j-api:2.25.3=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.logging.log4j:log4j-core:2.25.3=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.logging.log4j:log4j-layout-template-json:2.25.3=solrPlatformLibs +org.apache.logging.log4j:log4j-slf4j2-impl:2.25.3=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.logging.log4j:log4j-web:2.25.3=solrPlatformLibs +org.apache.lucene:lucene-analysis-common:10.4.0=apiHelper,compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +org.apache.lucene:lucene-analysis-kuromoji:10.4.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.lucene:lucene-analysis-nori:10.4.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.lucene:lucene-analysis-phonetic:10.4.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.lucene:lucene-backward-codecs:10.4.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.lucene:lucene-classification:10.4.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.lucene:lucene-codecs:10.4.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.lucene:lucene-core:10.4.0=apiHelper,compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +org.apache.lucene:lucene-expressions:10.4.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.lucene:lucene-facet:10.4.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.lucene:lucene-grouping:10.4.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.lucene:lucene-highlighter:10.4.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.lucene:lucene-join:10.4.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.lucene:lucene-memory:10.4.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.lucene:lucene-misc:10.4.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.lucene:lucene-queries:10.4.0=apiHelper,compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +org.apache.lucene:lucene-queryparser:10.4.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.lucene:lucene-sandbox:10.4.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.lucene:lucene-spatial-extras:10.4.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.lucene:lucene-spatial3d:10.4.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.lucene:lucene-suggest:10.4.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.apache.lucene:lucene-test-framework:10.4.0=jarValidation,testCompileClasspath,testRuntimeClasspath +org.apache.zookeeper:zookeeper-jute:3.9.4=apiHelper,compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +org.apache.zookeeper:zookeeper:3.9.4=apiHelper,compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +org.apiguardian:apiguardian-api:1.1.2=jarValidation,testRuntimeClasspath +org.codehaus.woodstox:stax2-api:4.2.2=apiHelper,compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +org.eclipse.jetty.ee10:jetty-ee10-servlet:12.0.34=jarValidation,testRuntimeClasspath +org.eclipse.jetty.http2:jetty-http2-client-transport:12.0.34=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.eclipse.jetty.http2:jetty-http2-client:12.0.34=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +org.eclipse.jetty.http2:jetty-http2-common:12.0.34=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +org.eclipse.jetty.http2:jetty-http2-hpack:12.0.34=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +org.eclipse.jetty.http2:jetty-http2-server:12.0.34=jarValidation,testRuntimeClasspath +org.eclipse.jetty:jetty-alpn-client:12.0.34=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +org.eclipse.jetty:jetty-alpn-java-client:12.0.34=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.eclipse.jetty:jetty-alpn-java-server:12.0.34=jarValidation,testRuntimeClasspath +org.eclipse.jetty:jetty-alpn-server:12.0.34=jarValidation,testRuntimeClasspath +org.eclipse.jetty:jetty-client:12.0.34=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.eclipse.jetty:jetty-http:12.0.34=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +org.eclipse.jetty:jetty-io:12.0.34=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +org.eclipse.jetty:jetty-rewrite:12.0.34=jarValidation,testRuntimeClasspath +org.eclipse.jetty:jetty-security:12.0.34=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.eclipse.jetty:jetty-server:12.0.34=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.eclipse.jetty:jetty-session:12.0.34=jarValidation,testRuntimeClasspath +org.eclipse.jetty:jetty-util:12.0.34=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +org.glassfish.hk2.external:aopalliance-repackaged:4.0.0-M3=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.glassfish.hk2:hk2-api:4.0.0-M3=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.glassfish.hk2:hk2-locator:4.0.0-M3=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.glassfish.hk2:hk2-utils:4.0.0-M3=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.glassfish.hk2:osgi-resource-locator:3.0.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.glassfish.jersey.containers:jersey-container-jetty-http:4.0.2=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.glassfish.jersey.core:jersey-client:4.0.2=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.glassfish.jersey.core:jersey-common:4.0.2=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.glassfish.jersey.core:jersey-server:4.0.2=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.glassfish.jersey.ext:jersey-entity-filtering:4.0.2=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.glassfish.jersey.inject:jersey-hk2:4.0.2=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.glassfish.jersey.media:jersey-media-json-jackson:4.0.2=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.glassfish.jersey:jersey-bom:4.0.2=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.hamcrest:hamcrest:3.0=jarValidation,testCompileClasspath,testRuntimeClasspath +org.javassist:javassist:3.30.2-GA=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.jetbrains.kotlin:kotlin-stdlib-jdk7:2.3.20=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +org.jetbrains.kotlin:kotlin-stdlib-jdk8:2.3.20=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +org.jetbrains.kotlin:kotlin-stdlib:2.3.20=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +org.jetbrains:annotations:26.0.2=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +org.jspecify:jspecify:1.0.0=annotationProcessor,apiHelper,compileClasspath,errorprone,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testAnnotationProcessor,testCompileClasspath,testRuntimeClasspath +org.junit.jupiter:junit-jupiter-api:5.6.2=jarValidation,testRuntimeClasspath +org.junit.platform:junit-platform-commons:1.6.2=jarValidation,testRuntimeClasspath +org.junit:junit-bom:5.6.2=jarValidation,testRuntimeClasspath +org.locationtech.spatial4j:spatial4j:0.8=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.opentest4j:opentest4j:1.2.0=jarValidation,testRuntimeClasspath +org.ow2.asm:asm-commons:9.8=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.ow2.asm:asm-tree:9.8=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.ow2.asm:asm:9.8=apiHelper,compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +org.pcollections:pcollections:4.0.1=annotationProcessor,errorprone,testAnnotationProcessor +org.reactivestreams:reactive-streams:1.0.4=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +org.rnorth.duct-tape:duct-tape:1.0.8=jarValidation,testCompileClasspath,testRuntimeClasspath +org.semver4j:semver4j:6.0.0=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.slf4j:jcl-over-slf4j:2.0.17=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +org.slf4j:jul-to-slf4j:2.0.17=solrPlatformLibs +org.slf4j:slf4j-api:2.0.17=apiHelper,compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +org.testcontainers:testcontainers:2.0.3=jarValidation,testCompileClasspath,testRuntimeClasspath +org.xerial.snappy:snappy-java:1.1.10.8=apiHelper,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath +empty=apiHelperTest,compileOnlyHelper,compileOnlyHelperTest,missingdoclet,packaging,permitAggregatorUse,permitTestAggregatorUse,permitTestUnusedDeclared,permitTestUsedUndeclared,permitUnusedDeclared,permitUsedUndeclared,signatures diff --git a/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobBackupRepository.java b/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobBackupRepository.java new file mode 100644 index 000000000000..50c8b63988af --- /dev/null +++ b/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobBackupRepository.java @@ -0,0 +1,396 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.azureblob; + +import com.google.common.annotations.VisibleForTesting; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.lang.invoke.MethodHandles; +import java.net.URI; +import java.net.URISyntaxException; +import java.time.Duration; +import java.time.Instant; +import java.util.Collection; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.StrUtils; +import org.apache.solr.core.backup.repository.AbstractBackupRepository; +import org.apache.solr.core.backup.repository.BackupRepository; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A concrete implementation of {@link BackupRepository} interface supporting backup/restore of Solr + * indexes to Azure Blob Storage. + */ +public class AzureBlobBackupRepository extends AbstractBackupRepository { + + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + static final String BLOB_SCHEME = "blob"; + private static final int CHUNK_SIZE = 16 * 1024 * 1024; + private static final int COPY_BUFFER_SIZE = 8192; + + private AzureBlobStorageClient client; + + @Override + public void init(NamedList args) { + super.init(args); + AzureBlobBackupRepositoryConfig backupConfig = new AzureBlobBackupRepositoryConfig(this.config); + + if (client != null) { + client.close(); + } + + this.client = backupConfig.buildClient(); + } + + @VisibleForTesting + public void setClient(AzureBlobStorageClient client) { + this.client = client; + } + + @Override + @SuppressWarnings("unchecked") + public T getConfigProperty(String name) { + return (T) this.config.get(name); + } + + @Override + public URI createURI(String location) { + if (StrUtils.isNullOrEmpty(location)) { + throw new IllegalArgumentException("cannot create URI with an empty location"); + } + + URI result; + try { + if (location.startsWith(BLOB_SCHEME + ":")) { + result = new URI(location); + } else if (location.startsWith("/")) { + result = new URI(BLOB_SCHEME, "", location, null); + } else { + result = new URI(BLOB_SCHEME, "", "/" + location, null); + } + return result; + } catch (URISyntaxException ex) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, ex); + } + } + + @Override + public URI createDirectoryURI(String location) { + if (StrUtils.isNullOrEmpty(location)) { + throw new IllegalArgumentException("cannot create URI with an empty location"); + } + + if (!location.endsWith("/")) { + location += "/"; + } + + return createURI(location); + } + + @Override + public URI resolve(URI baseUri, String... pathComponents) { + if (!BLOB_SCHEME.equalsIgnoreCase(baseUri.getScheme())) { + throw new IllegalArgumentException("URI must begin with 'blob:' scheme"); + } + + String path = baseUri + "/" + String.join("/", pathComponents); + return URI.create(path).normalize(); + } + + @Override + public URI resolveDirectory(URI baseUri, String... pathComponents) { + if (pathComponents.length > 0) { + if (!pathComponents[pathComponents.length - 1].endsWith("/")) { + pathComponents[pathComponents.length - 1] = pathComponents[pathComponents.length - 1] + "/"; + } + } else { + if (!baseUri.toString().endsWith("/")) { + baseUri = URI.create(baseUri + "/"); + } + } + return resolve(baseUri, pathComponents); + } + + @Override + public void createDirectory(URI path) throws IOException { + Objects.requireNonNull(path, "cannot create directory to a null URI"); + + String blobPath = getBlobPath(path); + + if (log.isDebugEnabled()) { + log.debug("Create directory '{}'", blobPath); + } + + try { + client.createDirectory(blobPath); + } catch (AzureBlobException e) { + throw new IOException("Failed to create directory " + blobPath, e); + } + } + + @Override + public void deleteDirectory(URI path) throws IOException { + Objects.requireNonNull(path, "cannot delete directory with a null URI"); + + String blobPath = getBlobPath(path); + + if (log.isDebugEnabled()) { + log.debug("Delete directory '{}'", blobPath); + } + + try { + client.deleteDirectory(blobPath); + } catch (AzureBlobException e) { + throw new IOException("Failed to delete directory " + blobPath, e); + } + } + + @Override + public void delete(URI path, Collection files) throws IOException { + Objects.requireNonNull(path, "cannot delete with a null URI"); + Objects.requireNonNull(files, "cannot delete with a null files collection"); + + String basePath = getBlobPath(path); + + try { + if (!client.isDirectory(basePath)) { + int lastSlash = basePath.lastIndexOf('/'); + basePath = lastSlash >= 0 ? basePath.substring(0, lastSlash) : ""; + } + } catch (AzureBlobException e) { + throw new IOException("Failed to check path type for " + basePath, e); + } + + final String baseForPaths = basePath; + Set fullPaths = + files.stream() + .map(file -> (baseForPaths.isEmpty() ? file : baseForPaths + "/" + file)) + .collect(Collectors.toSet()); + + if (log.isDebugEnabled()) { + log.debug("Delete files '{}'", fullPaths); + } + + try { + client.delete(fullPaths); + } catch (AzureBlobException e) { + throw new IOException("Failed to delete files " + fullPaths, e); + } + } + + @Override + public boolean exists(URI path) throws IOException { + Objects.requireNonNull(path, "cannot check existence with a null URI"); + + String blobPath = getBlobPath(path); + + if (log.isDebugEnabled()) { + log.debug("Check existence '{}'", blobPath); + } + + try { + return client.pathExists(blobPath); + } catch (AzureBlobException e) { + throw new IOException("Failed to check existence of " + blobPath, e); + } + } + + @Override + public PathType getPathType(URI path) throws IOException { + Objects.requireNonNull(path, "cannot get path type with a null URI"); + + String blobPath = getBlobPath(path); + + if (log.isDebugEnabled()) { + log.debug("Get path type '{}'", blobPath); + } + + try { + if (client.isDirectory(blobPath)) { + return BackupRepository.PathType.DIRECTORY; + } else { + return BackupRepository.PathType.FILE; + } + } catch (AzureBlobException e) { + throw new IOException("Failed to get path type for " + blobPath, e); + } + } + + @Override + public String[] listAll(URI path) throws IOException { + Objects.requireNonNull(path, "cannot list with a null URI"); + + String blobPath = getBlobPath(path); + + if (log.isDebugEnabled()) { + log.debug("List all '{}'", blobPath); + } + + try { + return client.listDir(blobPath); + } catch (AzureBlobException e) { + throw new IOException("Failed to list directory " + blobPath, e); + } + } + + @Override + public IndexInput openInput(URI dirPath, String fileName, IOContext ctx) throws IOException { + Objects.requireNonNull(dirPath, "cannot open input with a null URI"); + Objects.requireNonNull(fileName, "cannot open input with a null fileName"); + + String base = getBlobPath(dirPath); + String blobPath = base.endsWith("/") ? base + fileName : base + "/" + fileName; + + if (log.isDebugEnabled()) { + log.debug("Open input '{}'", blobPath); + } + + try { + return new AzureBlobIndexInput(blobPath, client, client.length(blobPath)); + } catch (AzureBlobException e) { + throw new IOException("Failed to open input stream for " + blobPath, e); + } + } + + @Override + public OutputStream createOutput(URI path) throws IOException { + Objects.requireNonNull(path, "cannot create output with a null URI"); + + String blobPath = getBlobPath(path); + + if (log.isDebugEnabled()) { + log.debug("Create output '{}'", blobPath); + } + + try { + return client.pushStream(blobPath); + } catch (AzureBlobException e) { + throw new IOException("Failed to create output stream for " + blobPath, e); + } + } + + @Override + public void copyIndexFileFrom( + Directory sourceDir, String sourceFileName, URI dest, String destFileName) + throws IOException { + Objects.requireNonNull(sourceDir, "cannot copy with a null sourceDir"); + Objects.requireNonNull(sourceFileName, "cannot copy with a null sourceFileName"); + Objects.requireNonNull(dest, "cannot copy with a null dest"); + + String destPath = getBlobPath(dest); + + String blobPath = destPath.endsWith("/") ? destPath + destFileName : destPath; + + if (log.isDebugEnabled()) { + log.debug("Copy index file from '{}' to '{}'", sourceFileName, blobPath); + } + + String parentDir = + blobPath.contains("/") ? blobPath.substring(0, blobPath.lastIndexOf('/') + 1) : ""; + try { + if (!parentDir.isEmpty()) { + client.createDirectory(parentDir); + } + } catch (AzureBlobException e) { + // ignore; write will surface real issues + } + + try (IndexInput input = sourceDir.openInput(sourceFileName, IOContext.DEFAULT); + OutputStream output = client.pushStream(blobPath)) { + byte[] buffer = new byte[COPY_BUFFER_SIZE]; + long remaining = input.length(); + while (remaining > 0) { + int toRead = (int) Math.min(buffer.length, remaining); + input.readBytes(buffer, 0, toRead); + output.write(buffer, 0, toRead); + remaining -= toRead; + } + } catch (AzureBlobException e) { + throw new IOException("Failed to copy file from " + sourceFileName + " to " + blobPath, e); + } + } + + @Override + public void copyIndexFileTo( + URI sourceDir, String sourceFileName, Directory dest, String destFileName) + throws IOException { + if (StrUtils.isNullOrEmpty(sourceFileName)) { + throw new IllegalArgumentException("must have a valid source file name to copy"); + } + if (StrUtils.isNullOrEmpty(destFileName)) { + throw new IllegalArgumentException("must have a valid destination file name to copy"); + } + + String basePath = getBlobPath(sourceDir); + String blobPath; + if (basePath.endsWith("/" + sourceFileName) + || basePath.equals(sourceFileName) + || basePath.equals("/" + sourceFileName)) { + blobPath = basePath; + } else { + URI filePath = resolve(sourceDir, sourceFileName); + blobPath = getBlobPath(filePath); + } + + Instant start = Instant.now(); + if (log.isDebugEnabled()) { + log.debug("Download started from blob '{}'", blobPath); + } + + try (InputStream inputStream = client.pullStream(blobPath); + IndexOutput indexOutput = dest.createOutput(destFileName, IOContext.DEFAULT)) { + byte[] buffer = new byte[CHUNK_SIZE]; + int len; + while ((len = inputStream.read(buffer)) != -1) { + indexOutput.writeBytes(buffer, 0, len); + } + } catch (AzureBlobException e) { + throw new IOException("Failed to copy file from " + blobPath + " to " + destFileName, e); + } + + long timeElapsed = Duration.between(start, Instant.now()).toMillis(); + + if (log.isInfoEnabled()) { + log.info("Download from Azure Blob Storage '{}' finished in {}ms", blobPath, timeElapsed); + } + } + + @Override + public void close() throws IOException { + if (client != null) { + client.close(); + } + } + + private String getBlobPath(URI uri) { + if (!BLOB_SCHEME.equalsIgnoreCase(uri.getScheme())) { + throw new IllegalArgumentException("URI must begin with 'blob:' scheme"); + } + return uri.getPath(); + } +} diff --git a/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobBackupRepositoryConfig.java b/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobBackupRepositoryConfig.java new file mode 100644 index 000000000000..f0f8f9c1f4c7 --- /dev/null +++ b/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobBackupRepositoryConfig.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.azureblob; + +import org.apache.solr.common.util.EnvUtils; +import org.apache.solr.common.util.NamedList; + +public class AzureBlobBackupRepositoryConfig { + + public static final String CONTAINER_NAME = "azure.blob.container.name"; + public static final String CONNECTION_STRING = "azure.blob.connection.string"; + public static final String ENDPOINT = "azure.blob.endpoint"; + public static final String ACCOUNT_NAME = "azure.blob.account.name"; + public static final String ACCOUNT_KEY = "azure.blob.account.key"; + public static final String SAS_TOKEN = "azure.blob.sas.token"; + public static final String TENANT_ID = "azure.blob.tenant.id"; + public static final String CLIENT_ID = "azure.blob.client.id"; + public static final String CLIENT_SECRET = "azure.blob.client.secret"; + + private final String containerName; + private final String connectionString; + private final String endpoint; + private final String accountName; + private final String accountKey; + private final String sasToken; + private final String tenantId; + private final String clientId; + private final String clientSecret; + + public AzureBlobBackupRepositoryConfig(NamedList config) { + containerName = getStringConfig(config, CONTAINER_NAME); + connectionString = getStringConfig(config, CONNECTION_STRING); + endpoint = getStringConfig(config, ENDPOINT); + accountName = getStringConfig(config, ACCOUNT_NAME); + accountKey = getStringConfig(config, ACCOUNT_KEY); + sasToken = getStringConfig(config, SAS_TOKEN); + tenantId = getStringConfig(config, TENANT_ID); + clientId = getStringConfig(config, CLIENT_ID); + clientSecret = getStringConfig(config, CLIENT_SECRET); + } + + public AzureBlobStorageClient buildClient() { + return new AzureBlobStorageClient( + containerName, + connectionString, + endpoint, + accountName, + accountKey, + sasToken, + tenantId, + clientId, + clientSecret); + } + + static String getStringConfig(NamedList config, String property) { + String envProp = EnvUtils.getProperty(property); + if (envProp == null) { + Object configProp = config.get(property); + return configProp == null ? null : configProp.toString(); + } else { + return envProp; + } + } +} diff --git a/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobException.java b/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobException.java new file mode 100644 index 000000000000..f32700351fab --- /dev/null +++ b/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobException.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.azureblob; + +/** + * Generic exception for Blob Storage related failures. Could originate from the {@link + * AzureBlobBackupRepository} or from its underlying {@link AzureBlobStorageClient}. + */ +public class AzureBlobException extends Exception { + public AzureBlobException(String message) { + super(message); + } + + public AzureBlobException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobIndexInput.java b/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobIndexInput.java new file mode 100644 index 000000000000..c523307e4f2e --- /dev/null +++ b/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobIndexInput.java @@ -0,0 +1,200 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.azureblob; + +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; +import java.util.LinkedHashMap; +import java.util.Map; +import org.apache.lucene.store.IndexInput; + +class AzureBlobIndexInput extends IndexInput { + + private static final int MIN_PAGE_SIZE = 4 * 1024; + private static final int DEFAULT_PAGE_SIZE = 512 * 1024; + private static final int MAX_CACHED_PAGES = 128; + + private final String path; + private final AzureBlobStorageClient client; + private final long length; + private final int pageSize; + private final LruPageCache cache; + + private long position = 0L; + private boolean closed = false; + + AzureBlobIndexInput(String path, AzureBlobStorageClient client, long length) { + this(path, client, length, DEFAULT_PAGE_SIZE, MAX_CACHED_PAGES); + } + + AzureBlobIndexInput( + String path, AzureBlobStorageClient client, long length, int pageSize, int maxCachedPages) { + super(path); + this.path = path; + this.client = client; + this.length = length; + this.pageSize = Math.max(MIN_PAGE_SIZE, pageSize); + this.cache = new LruPageCache(maxCachedPages); + } + + @Override + public void close() throws IOException { + closed = true; + cache.clear(); + } + + @Override + public long getFilePointer() { + return position; + } + + @Override + public void seek(long pos) throws IOException { + ensureOpen(); + if (pos < 0 || pos > length) { + throw new IOException("Seek position out of bounds: " + pos); + } + + position = pos; + } + + @Override + public long length() { + return length; + } + + @Override + public IndexInput slice(String sliceDescription, long offset, long length) throws IOException { + ensureOpen(); + if (offset < 0 || length < 0 || offset + length > this.length) { + throw new IOException("Slice out of bounds: offset=" + offset + ", length=" + length); + } + + AzureBlobIndexInput slice = + new AzureBlobIndexInput( + getFullSliceDescription(sliceDescription), client, length, pageSize, MAX_CACHED_PAGES); + + slice.position = 0L; + + // Wrap client in a view that remaps range requests by adding base offset + slice.clientViewBaseOffset = this.clientViewBaseOffset + offset; + return slice; + } + + @Override + public byte readByte() throws IOException { + ensureOpen(); + if (position >= length) { + throw new EOFException("End of stream reached"); + } + + byte[] page = getPage(pageIndex(position)); + int inPageOffset = (int) (position % pageSize); + byte value = page[inPageOffset]; + position += 1L; + return value; + } + + @Override + public void readBytes(byte[] b, int offset, int len) throws IOException { + ensureOpen(); + if (len < 0) { + throw new IOException("Length must be non-negative"); + } + + if (position + len > length) { + throw new EOFException("End of stream reached"); + } + + int remaining = len; + while (remaining > 0) { + long pageIdx = pageIndex(position); + byte[] page = getPage(pageIdx); + int inPageOffset = (int) (position % pageSize); + int toCopy = Math.min(remaining, pageSize - inPageOffset); + System.arraycopy(page, inPageOffset, b, offset + (len - remaining), toCopy); + position += toCopy; + remaining -= toCopy; + } + } + + // Internal state for slices: base offset to add to all range requests + private long clientViewBaseOffset = 0L; + + private byte[] getPage(long pageIdx) throws IOException { + byte[] page = cache.get(pageIdx); + if (page != null) { + return page; + } + + long absoluteOffset = clientViewBaseOffset + pageIdx * (long) pageSize; + int bytesToRead = (int) Math.min(pageSize, length - pageIdx * (long) pageSize); + if (bytesToRead <= 0) { + throw new EOFException("End of stream reached"); + } + + page = new byte[bytesToRead]; + try (InputStream in = client.pullRangeStream(path, absoluteOffset, bytesToRead)) { + int readTotal = 0; + while (readTotal < bytesToRead) { + int read = in.read(page, readTotal, bytesToRead - readTotal); + if (read == -1) break; + readTotal += read; + } + + if (readTotal < bytesToRead) { + throw new EOFException( + "End of stream reached: expected " + bytesToRead + " bytes, got " + readTotal); + } + } catch (AzureBlobException e) { + throw new IOException("Failed to fetch range page", e); + } + + cache.put(pageIdx, page); + return page; + } + + private long pageIndex(long pos) { + return pos / pageSize; + } + + private void ensureOpen() throws IOException { + if (closed) { + throw new IOException("IndexInput is closed"); + } + } + + private static final class LruPageCache extends LinkedHashMap { + private final int maxEntries; + + LruPageCache(int maxEntries) { + super(16, 0.75f, true); + this.maxEntries = maxEntries; + } + + @Override + protected boolean removeEldestEntry(Map.Entry eldest) { + return size() > maxEntries; + } + + @Override + public void clear() { + super.clear(); + } + } +} diff --git a/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobNotFoundException.java b/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobNotFoundException.java new file mode 100644 index 000000000000..a6f5253c0e3f --- /dev/null +++ b/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobNotFoundException.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.azureblob; + +/** Exception thrown when a blob is not found in Azure Blob Storage. */ +public class AzureBlobNotFoundException extends AzureBlobException { + public AzureBlobNotFoundException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobOutputStream.java b/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobOutputStream.java new file mode 100644 index 000000000000..d48fc472a7e7 --- /dev/null +++ b/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobOutputStream.java @@ -0,0 +1,269 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.azureblob; + +import com.azure.storage.blob.BlobClient; +import com.azure.storage.blob.models.BlobStorageException; +import com.azure.storage.blob.specialized.BlockBlobClient; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.lang.invoke.MethodHandles; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Base64; +import java.util.List; +import java.util.UUID; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * OutputStream implementation for Azure Blob Storage using block blobs. Supports chunked uploads + * for large files. + */ +public class AzureBlobOutputStream extends OutputStream { + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + static final int BLOCK_SIZE = 4 * 1024 * 1024; + + private final BlobClient blobClient; + private final String blobPath; + private volatile boolean closed; + private final ByteBuffer buffer; + private BlockUpload blockUpload; + private boolean committed; + + public AzureBlobOutputStream(BlobClient blobClient, String blobPath) { + this.blobClient = blobClient; + this.blobPath = blobPath; + this.closed = false; + this.buffer = ByteBuffer.allocate(BLOCK_SIZE); + this.blockUpload = null; + this.committed = false; + + if (log.isDebugEnabled()) { + log.debug("Created BlobOutputStream for blobPath '{}'", blobPath); + } + } + + @Override + public void write(int b) throws IOException { + if (closed) { + throw new IOException("Stream closed"); + } + + buffer.put((byte) b); + + if (!buffer.hasRemaining()) { + uploadBlock(); + } + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + if (closed) { + throw new IOException("Stream closed"); + } + + if (outOfRange(off, b.length) || len < 0 || outOfRange(off + len, b.length)) { + throw new IndexOutOfBoundsException(); + } else if (len == 0) { + return; + } + + int currentOffset = off; + int lenRemaining = len; + while (buffer.remaining() < lenRemaining) { + int firstPart = buffer.remaining(); + buffer.put(b, currentOffset, firstPart); + uploadBlock(); + + currentOffset += firstPart; + lenRemaining -= firstPart; + } + if (lenRemaining > 0) { + buffer.put(b, currentOffset, lenRemaining); + } + } + + private static boolean outOfRange(int off, int len) { + return off < 0 || off > len; + } + + private void uploadBlock() throws IOException { + int size = buffer.position() - buffer.arrayOffset(); + + if (size == 0) { + return; + } + + if (blockUpload == null) { + if (log.isDebugEnabled()) { + log.debug("New block upload for blobPath '{}'", blobPath); + } + + blockUpload = newBlockUpload(); + } + + try (ByteArrayInputStream inputStream = + new ByteArrayInputStream(buffer.array(), buffer.arrayOffset(), size)) { + blockUpload.uploadBlock(inputStream, size); + } catch (BlobStorageException e) { + if (blockUpload != null) { + blockUpload.abort(); + if (log.isDebugEnabled()) { + log.debug("Block upload aborted for blobPath '{}'.", blobPath); + } + } + + throw new IOException( + "Failed to upload block", AzureBlobStorageClient.handleBlobException(e)); + } + + buffer.clear(); + } + + @Override + public void flush() throws IOException { + if (closed) { + throw new IOException("Stream closed"); + } + + if (buffer.position() - buffer.arrayOffset() > 0) { + uploadBlock(); + } + + if (blockUpload != null) { + blockUpload.complete(); + blockUpload = null; + committed = true; + } + } + + @Override + public void close() throws IOException { + if (closed) { + return; + } + + if (blockUpload != null && blockUpload.aborted) { + blockUpload = null; + closed = true; + return; + } + + if (!committed) { + uploadBlock(); + if (blockUpload != null) { + blockUpload.complete(); + blockUpload = null; + committed = true; + } else { + try { + blobClient.upload(new ByteArrayInputStream(new byte[0]), 0, true); + } catch (BlobStorageException e) { + throw new IOException( + "Failed to create empty blob", AzureBlobStorageClient.handleBlobException(e)); + } + } + } else { + if (blockUpload != null) { + blockUpload.complete(); + blockUpload = null; + } + } + + closed = true; + } + + private BlockUpload newBlockUpload() throws IOException { + try { + return new BlockUpload(); + } catch (BlobStorageException e) { + throw new IOException( + "Failed to create block upload", AzureBlobStorageClient.handleBlobException(e)); + } + } + + private class BlockUpload { + private final List blockIds; + private boolean aborted = false; + + public BlockUpload() { + this.blockIds = new ArrayList<>(); + if (log.isDebugEnabled()) { + log.debug("Initiated block upload for blobPath '{}'", blobPath); + } + + try { + BlockBlobClient blockBlobClient = blobClient.getBlockBlobClient(); + blockBlobClient.deleteIfExists(); + } catch (BlobStorageException e) { + // ignore; subsequent stage/commit will surface real issues + } + } + + void uploadBlock(ByteArrayInputStream inputStream, long blockSize) { + if (aborted) { + throw new IllegalStateException( + "Can't upload new blocks on a BlockUpload that was aborted"); + } + + String blockId = + Base64.getEncoder() + .encodeToString(UUID.randomUUID().toString().getBytes(StandardCharsets.UTF_8)); + + if (log.isDebugEnabled()) { + log.debug("Uploading block {} for blobPath '{}'", blockId, blobPath); + } + + try { + BlockBlobClient blockBlobClient = blobClient.getBlockBlobClient(); + blockBlobClient.stageBlock(blockId, inputStream, blockSize); + blockIds.add(blockId); + } catch (BlobStorageException e) { + throw new RuntimeException("Failed to upload block", e); + } + } + + void complete() { + if (aborted) { + throw new IllegalStateException("Can't complete a BlockUpload that was aborted"); + } + + if (log.isDebugEnabled()) { + log.debug("Completing block upload for blobPath '{}'", blobPath); + } + + try { + BlockBlobClient blockBlobClient = blobClient.getBlockBlobClient(); + blockBlobClient.commitBlockList(blockIds); + } catch (BlobStorageException e) { + throw new RuntimeException("Failed to commit block list", e); + } + } + + public void abort() { + if (log.isWarnEnabled()) { + log.warn("Aborting block upload for blobPath '{}'", blobPath); + } + + aborted = true; + } + } +} diff --git a/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobStorageClient.java b/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobStorageClient.java new file mode 100644 index 000000000000..e91b8d6dcbbb --- /dev/null +++ b/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/AzureBlobStorageClient.java @@ -0,0 +1,529 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.azureblob; + +import com.azure.core.credential.TokenCredential; +import com.azure.identity.DefaultAzureCredentialBuilder; +import com.azure.storage.blob.BlobClient; +import com.azure.storage.blob.BlobContainerClient; +import com.azure.storage.blob.BlobServiceClient; +import com.azure.storage.blob.BlobServiceClientBuilder; +import com.azure.storage.blob.models.BlobItem; +import com.azure.storage.blob.models.BlobStorageException; +import com.azure.storage.blob.models.ListBlobsOptions; +import com.google.common.annotations.VisibleForTesting; +import java.io.ByteArrayInputStream; +import java.io.FilterInputStream; +import java.io.InputStream; +import java.io.OutputStream; +import java.lang.invoke.MethodHandles; +import java.util.Collection; +import java.util.HashSet; +import java.util.Locale; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.solr.common.util.ResumableInputStream; +import org.apache.solr.common.util.StrUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Creates a {@link BlobServiceClient} for communicating with Azure Blob Storage. Utilizes the + * default Azure credential provider chain. + */ +public class AzureBlobStorageClient { + + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + static final String BLOB_FILE_PATH_DELIMITER = "/"; + private static final int HTTP_NOT_FOUND = 404; + private static final int HTTP_CONFLICT = 409; + private static final int SKIP_BUFFER_SIZE = 8192; + private static final int DELETE_BATCH_SIZE = 1000; + + private static final com.azure.core.http.HttpClient SHARED_HTTP_CLIENT = + new com.azure.core.http.okhttp.OkHttpAsyncHttpClientBuilder().build(); + + private final BlobContainerClient containerClient; + + AzureBlobStorageClient( + String containerName, + String connectionString, + String endpoint, + String accountName, + String accountKey, + String sasToken, + String tenantId, + String clientId, + String clientSecret) { + this( + createInternalClient( + connectionString, + endpoint, + accountName, + accountKey, + sasToken, + tenantId, + clientId, + clientSecret), + containerName); + } + + @VisibleForTesting + AzureBlobStorageClient(BlobServiceClient blobServiceClient, String containerName) { + this.containerClient = blobServiceClient.getBlobContainerClient(containerName); + try { + containerClient.create(); + } catch (BlobStorageException e) { + if (e.getStatusCode() != HTTP_CONFLICT) { + throw e; + } + } + } + + private static BlobServiceClient createInternalClient( + String connectionString, + String endpoint, + String accountName, + String accountKey, + String sasToken, + String tenantId, + String clientId, + String clientSecret) { + + BlobServiceClientBuilder builder = new BlobServiceClientBuilder(); + builder.httpClient(SHARED_HTTP_CLIENT); + + if (StrUtils.isNotNullOrEmpty(connectionString)) { + builder.connectionString(connectionString); + } else if (StrUtils.isNotNullOrEmpty(endpoint)) { + builder.endpoint(endpoint); + if (StrUtils.isNotNullOrEmpty(accountName) && StrUtils.isNotNullOrEmpty(accountKey)) { + builder.credential( + new com.azure.storage.common.StorageSharedKeyCredential(accountName, accountKey)); + } else if (StrUtils.isNotNullOrEmpty(sasToken)) { + builder.sasToken(sasToken); + } else { + TokenCredential credential = new DefaultAzureCredentialBuilder().tenantId(tenantId).build(); + builder.credential(credential); + } + } else { + throw new IllegalArgumentException("Either connectionString or endpoint must be provided"); + } + + return builder.buildClient(); + } + + void createDirectory(String path) throws AzureBlobException { + String sanitizedDirPath = sanitizedDirPath(path); + + if (!pathExists(sanitizedDirPath)) { + String parent = getParentDirectory(sanitizedDirPath); + if (!parent.isEmpty() && !parent.equals(BLOB_FILE_PATH_DELIMITER)) { + createDirectory(parent); + } + + try { + BlobClient blobClient = containerClient.getBlobClient(sanitizedDirPath); + blobClient.upload(new ByteArrayInputStream(new byte[0]), 0, true); + java.util.Map metadata = new java.util.HashMap<>(); + metadata.put("hdi_isfolder", "true"); + blobClient.setMetadata(metadata); + } catch (BlobStorageException e) { + throw handleBlobException(e); + } + } + } + + void delete(Collection paths) throws AzureBlobException { + Set entries = new HashSet<>(); + for (String path : paths) { + entries.add(sanitizedFilePath(path)); + } + deleteBlobs(entries); + } + + void deleteDirectory(String path) throws AzureBlobException { + path = sanitizedDirPath(path); + + Set entries = listAll(path); + if (pathExists(path)) { + entries.add(path); + } + + deleteBlobs(entries); + } + + String[] listDir(String path) throws AzureBlobException { + path = sanitizedDirPath(path); + + try { + ListBlobsOptions options = new ListBlobsOptions().setPrefix(path).setMaxResultsPerPage(1000); + + final String finalPath = path; + return containerClient.listBlobs(options, null).stream() + .map(BlobItem::getName) + .filter(s -> s.startsWith(finalPath)) + .map(s -> s.substring(finalPath.length())) + .filter(s -> !s.isEmpty()) + .filter( + s -> { + int slashIndex = s.indexOf(BLOB_FILE_PATH_DELIMITER); + return slashIndex == -1 || slashIndex == s.length() - 1; + }) + .toArray(String[]::new); + } catch (BlobStorageException e) { + throw handleBlobException(e); + } + } + + boolean pathExists(String path) throws AzureBlobException { + final String blobPath = sanitizedPath(path); + + if (blobPath.isEmpty() || BLOB_FILE_PATH_DELIMITER.equals(blobPath)) { + return true; + } + + try { + BlobClient blobClient = containerClient.getBlobClient(blobPath); + return blobClient.exists(); + } catch (BlobStorageException e) { + throw handleBlobException(e); + } + } + + boolean isDirectory(String path) throws AzureBlobException { + final String dirPrefix = sanitizedDirPath(path); + + try { + ListBlobsOptions options = + new ListBlobsOptions().setPrefix(dirPrefix).setMaxResultsPerPage(1); + if (containerClient.listBlobs(options, null).iterator().hasNext()) { + return true; + } + + BlobClient markerClient = containerClient.getBlobClient(dirPrefix); + if (markerClient.exists()) { + long size = markerClient.getProperties().getBlobSize(); + if (size == 0) { + return true; + } + java.util.Map md = markerClient.getProperties().getMetadata(); + return md != null && md.containsKey("hdi_isfolder"); + } + + return false; + } catch (BlobStorageException e) { + throw handleBlobException(e); + } + } + + long length(String path) throws AzureBlobException { + String blobPath = sanitizedFilePath(path); + try { + BlobClient blobClient = containerClient.getBlobClient(blobPath); + return blobClient.getProperties().getBlobSize(); + } catch (BlobStorageException e) { + throw handleBlobException(e); + } + } + + InputStream pullStream(String path) throws AzureBlobException { + final String blobPath = sanitizedFilePath(path); + + try { + BlobClient blobClient = containerClient.getBlobClient(blobPath); + final long contentLength = blobClient.getProperties().getBlobSize(); + + if (contentLength == 0) { + return new ByteArrayInputStream(new byte[0]); + } + + InputStream initial = new IdempotentCloseInputStream(blobClient.openInputStream()); + + return new ResumableInputStream( + initial, + bytesRead -> { + if (contentLength > 0 && bytesRead >= contentLength) { + return null; + } + try { + long remaining = + contentLength > 0 ? Math.max(0, contentLength - bytesRead) : Long.MAX_VALUE; + return pullRangeStream(path, bytesRead, remaining); + } catch (AzureBlobException e) { + throw new RuntimeException(e); + } + }); + } catch (BlobStorageException e) { + throw handleBlobException(e); + } + } + + InputStream pullRangeStream(String path, long offset, long length) throws AzureBlobException { + final String blobPath = sanitizedFilePath(path); + try { + BlobClient blobClient = containerClient.getBlobClient(blobPath); + com.azure.storage.blob.models.BlobRange range = + new com.azure.storage.blob.models.BlobRange(offset, length); + return new IdempotentCloseInputStream(blobClient.openInputStream(range, null)); + } catch (BlobStorageException e) { + throw handleBlobException(e); + } + } + + private static final class IdempotentCloseInputStream extends FilterInputStream { + private boolean closed; + + IdempotentCloseInputStream(InputStream in) { + super(in); + this.closed = false; + } + + @Override + public int read() throws java.io.IOException { + if (closed) { + throw new java.io.IOException("Stream is already closed"); + } + try { + return super.read(); + } catch (RuntimeException re) { + if (isAlreadyClosed(re)) { + throw new java.io.IOException("Stream is already closed", re); + } + throw re; + } + } + + @Override + public int read(byte[] b, int off, int len) throws java.io.IOException { + if (closed) { + throw new java.io.IOException("Stream is already closed"); + } + try { + return super.read(b, off, len); + } catch (RuntimeException re) { + if (isAlreadyClosed(re)) { + throw new java.io.IOException("Stream is already closed", re); + } + throw re; + } + } + + @Override + public void close() throws java.io.IOException { + if (closed) { + return; + } + try { + super.close(); + } catch (java.io.IOException e) { + String msg = e.getMessage(); + if (msg == null || !msg.toLowerCase(java.util.Locale.ROOT).contains("already closed")) { + throw e; + } + // swallow "already closed" to make close idempotent + } finally { + closed = true; + } + } + + @Override + public long skip(long n) throws java.io.IOException { + if (closed) { + throw new java.io.IOException("Stream is already closed"); + } + if (n <= 0) { + return 0L; + } + long remaining = n; + byte[] discard = new byte[SKIP_BUFFER_SIZE]; + try { + while (remaining > 0) { + int toRead = (int) Math.min(discard.length, remaining); + int read = super.read(discard, 0, toRead); + if (read < 0) { + break; + } + remaining -= read; + } + return n - remaining; + } catch (RuntimeException re) { + throw new java.io.IOException(re); + } + } + + private static boolean isAlreadyClosed(Throwable t) { + String msg = t.getMessage(); + return msg != null && msg.toLowerCase(java.util.Locale.ROOT).contains("already closed"); + } + } + + OutputStream pushStream(String path) throws AzureBlobException { + path = sanitizedFilePath(path); + + if (!parentDirectoryExist(path)) { + String parentDirectory = getParentDirectory(path); + if (!parentDirectory.isEmpty() && !parentDirectory.equals(BLOB_FILE_PATH_DELIMITER)) { + createDirectory(parentDirectory); + } + } + + try { + BlobClient blobClient = containerClient.getBlobClient(path); + return new AzureBlobOutputStream(blobClient, path); + } catch (BlobStorageException e) { + throw handleBlobException(e); + } + } + + void close() {} + + @VisibleForTesting + void deleteContainerForTests() { + try { + containerClient.delete(); + } catch (BlobStorageException e) { + if (e.getStatusCode() != HTTP_NOT_FOUND) { + throw e; + } + } + } + + private Collection deleteBlobs(Collection paths) throws AzureBlobException { + try { + return deleteBlobs(paths, DELETE_BATCH_SIZE); + } catch (BlobStorageException e) { + throw handleBlobException(e); + } + } + + @VisibleForTesting + Collection deleteBlobs(Collection entries, int batchSize) + throws AzureBlobException { + Set deletedPaths = new HashSet<>(); + + for (String path : entries) { + try { + BlobClient blobClient = containerClient.getBlobClient(path); + boolean existed = blobClient.deleteIfExists(); + if (existed) { + deletedPaths.add(path); + } + } catch (BlobStorageException e) { + if (e.getStatusCode() == HTTP_NOT_FOUND) { + continue; + } + + throw new AzureBlobException("Could not delete blob with path: " + path, e); + } + } + + return deletedPaths; + } + + private Set listAll(String path) throws AzureBlobException { + String prefix = sanitizedDirPath(path); + + try { + ListBlobsOptions options = + new ListBlobsOptions().setPrefix(prefix).setMaxResultsPerPage(1000); + + return containerClient.listBlobs(options, null).stream() + .map(BlobItem::getName) + .filter(s -> s.startsWith(prefix)) + .collect(Collectors.toSet()); + } catch (BlobStorageException e) { + throw handleBlobException(e); + } + } + + private boolean parentDirectoryExist(String path) throws AzureBlobException { + String parentDirectory = getParentDirectory(path); + + if (parentDirectory.isEmpty() || parentDirectory.equals(BLOB_FILE_PATH_DELIMITER)) { + return true; + } + + return pathExists(parentDirectory); + } + + private String getParentDirectory(String path) { + if (!path.contains(BLOB_FILE_PATH_DELIMITER)) { + return ""; + } + + int fromEnd = path.length() - 1; + if (path.endsWith(BLOB_FILE_PATH_DELIMITER)) { + fromEnd -= 1; + } + return fromEnd > 0 + ? path.substring(0, path.lastIndexOf(BLOB_FILE_PATH_DELIMITER, fromEnd) + 1) + : ""; + } + + String sanitizedPath(String path) throws AzureBlobException { + String sanitizedPath = path.trim(); + while (sanitizedPath.startsWith(BLOB_FILE_PATH_DELIMITER)) { + sanitizedPath = sanitizedPath.substring(1).trim(); + } + + return sanitizedPath; + } + + String sanitizedFilePath(String path) throws AzureBlobException { + String sanitizedPath = sanitizedPath(path); + + if (sanitizedPath.endsWith(BLOB_FILE_PATH_DELIMITER)) { + throw new AzureBlobException("Invalid Path. Path for file can't end with '/'"); + } + + if (sanitizedPath.isEmpty()) { + throw new AzureBlobException("Invalid Path. Path cannot be empty"); + } + + return sanitizedPath; + } + + String sanitizedDirPath(String path) throws AzureBlobException { + String sanitizedPath = sanitizedPath(path); + + if (!sanitizedPath.endsWith(BLOB_FILE_PATH_DELIMITER)) { + sanitizedPath += BLOB_FILE_PATH_DELIMITER; + } + + return sanitizedPath; + } + + static AzureBlobException handleBlobException(BlobStorageException e) { + String errMessage = + String.format( + Locale.ROOT, + "Azure Blob Storage error: [statusCode=%s] [errorCode=%s] [message=%s]", + e.getStatusCode(), + e.getErrorCode(), + e.getMessage()); + + log.error(errMessage); + + if (e.getStatusCode() == HTTP_NOT_FOUND) { + return new AzureBlobNotFoundException(errMessage, e); + } else { + return new AzureBlobException(errMessage, e); + } + } +} diff --git a/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/package-info.java b/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/package-info.java new file mode 100644 index 000000000000..c76136b3e788 --- /dev/null +++ b/solr/modules/azure-blob-repository/src/java/org/apache/solr/azureblob/package-info.java @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** Solr Azure Blob Storage backup repository */ +package org.apache.solr.azureblob; diff --git a/solr/modules/azure-blob-repository/src/test-files/conf/schema.xml b/solr/modules/azure-blob-repository/src/test-files/conf/schema.xml new file mode 100644 index 000000000000..a3a7cc465c27 --- /dev/null +++ b/solr/modules/azure-blob-repository/src/test-files/conf/schema.xml @@ -0,0 +1,29 @@ + + + + + + + + + + + + + id + diff --git a/solr/modules/azure-blob-repository/src/test-files/conf/solrconfig.xml b/solr/modules/azure-blob-repository/src/test-files/conf/solrconfig.xml new file mode 100644 index 000000000000..853ba6562416 --- /dev/null +++ b/solr/modules/azure-blob-repository/src/test-files/conf/solrconfig.xml @@ -0,0 +1,51 @@ + + + + + + + + + ${solr.data.dir:} + + + + + ${tests.luceneMatchVersion:LATEST} + + + + ${solr.commitwithin.softcommit:true} + + + + + + + explicit + true + text + + + + + +: + + diff --git a/solr/modules/azure-blob-repository/src/test-files/log4j2.xml b/solr/modules/azure-blob-repository/src/test-files/log4j2.xml new file mode 100644 index 000000000000..528299e3e0bd --- /dev/null +++ b/solr/modules/azure-blob-repository/src/test-files/log4j2.xml @@ -0,0 +1,40 @@ + + + + + + + + + %maxLen{%-4r %-5p (%t) [%notEmpty{n:%X{node_name}}%notEmpty{ c:%X{collection}}%notEmpty{ s:%X{shard}}%notEmpty{ r:%X{replica}}%notEmpty{ x:%X{core}}%notEmpty{ t:%X{trace_id}}] %c{1.} %m%notEmpty{ + =>%ex{short}}}{10240}%n + + + + + + + + + + + + + + + diff --git a/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AbstractAzureBlobClientTest.java b/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AbstractAzureBlobClientTest.java new file mode 100644 index 000000000000..f6ae8f547d7c --- /dev/null +++ b/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AbstractAzureBlobClientTest.java @@ -0,0 +1,203 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.azureblob; + +import com.azure.core.http.HttpClient; +import com.azure.core.http.okhttp.OkHttpAsyncHttpClientBuilder; +import com.azure.storage.blob.BlobServiceClient; +import com.azure.storage.blob.BlobServiceClientBuilder; +import com.carrotsearch.randomizedtesting.ThreadFilter; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.charset.StandardCharsets; +import java.util.concurrent.TimeUnit; +import okhttp3.OkHttpClient; +import org.apache.lucene.tests.util.QuickPatchThreadsFilter; +import org.apache.solr.SolrIgnoredThreadsFilter; +import org.apache.solr.SolrTestCase; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Assume; +import org.junit.Before; +import org.junit.BeforeClass; +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.utility.DockerImageName; + +/** Abstract class for tests with Azure Blob Storage emulator. */ +@ThreadLeakFilters( + defaultFilters = true, + filters = { + SolrIgnoredThreadsFilter.class, + QuickPatchThreadsFilter.class, + AbstractAzureBlobClientTest.OkHttpThreadLeakFilterTest.class, + }) +public class AbstractAzureBlobClientTest extends SolrTestCase { + + private static final String AZURITE_IMAGE = "mcr.microsoft.com/azure-storage/azurite:3.33.0"; + private static final int BLOB_SERVICE_PORT = 10000; + + private static GenericContainer azuriteContainer; + private static OkHttpClient sharedOkHttpClient; + private static String connectionString; + + protected String containerName; + protected org.apache.solr.util.SocketProxy proxy; + + protected AzureBlobStorageClient client; + + @SuppressWarnings("resource") + @BeforeClass + public static void setUpClass() { + try { + azuriteContainer = + new GenericContainer<>(DockerImageName.parse(AZURITE_IMAGE)) + .withExposedPorts(BLOB_SERVICE_PORT); + azuriteContainer.start(); + sharedOkHttpClient = new OkHttpClient.Builder().build(); + } catch (Throwable t) { + Assume.assumeNoException("Docker/Testcontainers not available; skipping Azure tests", t); + } + } + + @Before + public void setUpClient() throws Exception { + setAzureTestCredentials(); + + String blobServiceUrl = getBlobServiceUrl(); + connectionString = + "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=" + + blobServiceUrl + + "/devstoreaccount1;"; + + proxy = new org.apache.solr.util.SocketProxy(); + proxy.open(new java.net.URI(blobServiceUrl)); + + HttpClient httpClient = new OkHttpAsyncHttpClientBuilder(sharedOkHttpClient).build(); + + String proxiedConn = + connectionString.replace( + ":" + azuriteContainer.getMappedPort(BLOB_SERVICE_PORT), ":" + proxy.getListenPort()); + + BlobServiceClient blobServiceClient = + new BlobServiceClientBuilder() + .connectionString(proxiedConn) + .httpClient(httpClient) + .buildClient(); + + containerName = "test-" + java.util.UUID.randomUUID(); + client = new AzureBlobStorageClient(blobServiceClient, containerName); + } + + public static void setAzureTestCredentials() { + System.setProperty("AZURE_CLIENT_ID", "test-client-id"); + System.setProperty("AZURE_TENANT_ID", "test-tenant-id"); + System.setProperty("AZURE_CLIENT_SECRET", "test-client-secret"); + } + + @After + public void tearDownClient() { + if (client != null) { + try { + client.deleteContainerForTests(); + } catch (Throwable ignored) { + } + client.close(); + } + if (proxy != null) { + proxy.close(); + proxy = null; + } + } + + /** Simulate a connection loss on the proxy. */ + void initiateBlobConnectionLoss() { + if (proxy != null) { + proxy.halfClose(); + } + } + + @AfterClass + public static void afterAll() { + if (azuriteContainer != null) { + try { + azuriteContainer.stop(); + azuriteContainer.close(); + } catch (Throwable ignored) { + } + azuriteContainer = null; + } + + if (sharedOkHttpClient != null) { + sharedOkHttpClient.dispatcher().executorService().shutdown(); + sharedOkHttpClient.dispatcher().cancelAll(); + sharedOkHttpClient.connectionPool().evictAll(); + try { + if (sharedOkHttpClient.cache() != null) { + sharedOkHttpClient.cache().close(); + } + } catch (Throwable ignored) { + } + try { + sharedOkHttpClient.dispatcher().executorService().awaitTermination(2, TimeUnit.SECONDS); + } catch (Throwable ignored) { + } + sharedOkHttpClient = null; + } + + try { + reactor.core.scheduler.Schedulers.shutdownNow(); + Thread.sleep(100); + } catch (Throwable ignored) { + } + } + + void pushContent(String path, String content) throws AzureBlobException { + pushContent(path, content.getBytes(StandardCharsets.UTF_8)); + } + + void pushContent(String path, byte[] content) throws AzureBlobException { + try (OutputStream output = client.pushStream(path)) { + output.write(content); + } catch (IOException e) { + throw new AzureBlobException("Failed to write content", e); + } + } + + static String getConnectionString() { + return connectionString; + } + + String getBlobServiceUrl() { + return "http://" + + azuriteContainer.getHost() + + ":" + + azuriteContainer.getMappedPort(BLOB_SERVICE_PORT); + } + + public static class OkHttpThreadLeakFilterTest implements ThreadFilter { + + @Override + public boolean reject(Thread t) { + String name = t.getName(); + if (name == null) { + return false; + } + return name.contains("OkHttp") || name.contains("Okio Watchdog"); + } + } +} diff --git a/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobBackupRepositoryTest.java b/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobBackupRepositoryTest.java new file mode 100644 index 000000000000..cc2432eb51c8 --- /dev/null +++ b/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobBackupRepositoryTest.java @@ -0,0 +1,317 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.azureblob; + +import static org.apache.solr.azureblob.AzureBlobBackupRepository.BLOB_SCHEME; + +import java.io.IOException; +import java.io.OutputStream; +import java.net.URI; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import org.apache.commons.io.file.PathUtils; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.backup.repository.BackupRepository; +import org.junit.Before; +import org.junit.Test; + +public class AzureBlobBackupRepositoryTest extends AbstractAzureBlobClientTest { + + private AzureBlobBackupRepository repository; + + protected static final String CONTAINER_NAME = "test-container"; + + protected Class getRepositoryClass() { + return AzureBlobBackupRepository.class; + } + + protected BackupRepository getRepository() { + return repository; + } + + protected URI getBaseUri() { + return URI.create(BLOB_SCHEME + ":/"); + } + + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + + NamedList config = new NamedList<>(); + config.add("azure.blob.container.name", CONTAINER_NAME); + config.add("azure.blob.connection.string", getConnectionString()); + + repository = + new AzureBlobBackupRepository() { + @Override + public void init(NamedList args) { + this.config = args; + setClient(AzureBlobBackupRepositoryTest.this.client); + } + }; + + repository.init(config); + } + + @Test + public void testCreateDirectory() throws IOException { + URI dirUri = getBaseUri().resolve("test-dir/"); + repository.createDirectory(dirUri); + assertTrue("Directory should exist", repository.exists(dirUri)); + assertEquals( + "Should be a directory", + BackupRepository.PathType.DIRECTORY, + repository.getPathType(dirUri)); + } + + @Test + public void testCreateFile() throws IOException { + URI fileUri = getBaseUri().resolve("test-file.txt"); + String content = "Hello, Azure Blob Storage!"; + + try (OutputStream output = repository.createOutput(fileUri)) { + output.write(content.getBytes(StandardCharsets.UTF_8)); + } + + assertTrue("File should exist", repository.exists(fileUri)); + assertEquals( + "Should be a file", BackupRepository.PathType.FILE, repository.getPathType(fileUri)); + } + + @Test + public void testReadWriteFile() throws IOException { + URI fileUri = getBaseUri().resolve("read-write-test.txt"); + String originalContent = "Test content for read/write operations"; + + try (OutputStream output = repository.createOutput(fileUri)) { + output.write(originalContent.getBytes(StandardCharsets.UTF_8)); + } + + try (IndexInput input = + repository.openInput(getBaseUri(), "read-write-test.txt", IOContext.DEFAULT)) { + byte[] buffer = new byte[1024]; + input.readBytes(buffer, 0, (int) input.length()); + String readContent = new String(buffer, 0, (int) input.length(), StandardCharsets.UTF_8); + assertEquals("Content should match", originalContent, readContent); + } + } + + @Test + public void testDeleteFile() throws IOException { + URI fileUri = getBaseUri().resolve("delete-test.txt"); + String content = "File to be deleted"; + + try (OutputStream output = repository.createOutput(fileUri)) { + output.write(content.getBytes(StandardCharsets.UTF_8)); + } + + assertTrue("File should exist before deletion", repository.exists(fileUri)); + + repository.delete(fileUri, java.util.Arrays.asList("delete-test.txt")); + + assertFalse("File should not exist after deletion", repository.exists(fileUri)); + } + + @Test + public void testDeleteDirectory() throws IOException { + URI dirUri = getBaseUri().resolve("delete-dir/"); + URI fileUri = dirUri.resolve("nested-file.txt"); + + repository.createDirectory(dirUri); + try (OutputStream output = repository.createOutput(fileUri)) { + output.write("Nested file content".getBytes(StandardCharsets.UTF_8)); + } + + assertTrue("Directory should exist", repository.exists(dirUri)); + assertTrue("File should exist", repository.exists(fileUri)); + + repository.deleteDirectory(dirUri); + + assertFalse("Directory should not exist after deletion", repository.exists(dirUri)); + assertFalse("File should not exist after deletion", repository.exists(fileUri)); + } + + @Test + public void testListDirectory() throws IOException { + URI dirUri = getBaseUri().resolve("list-test/"); + repository.createDirectory(dirUri); + + String[] fileNames = {"file1.txt", "file2.txt", "subdir/"}; + for (String fileName : fileNames) { + URI fileUri = dirUri.resolve(fileName); + if (fileName.endsWith("/")) { + repository.createDirectory(fileUri); + } else { + try (OutputStream output = repository.createOutput(fileUri)) { + output.write(("Content of " + fileName).getBytes(StandardCharsets.UTF_8)); + } + } + } + + String[] listedFiles = repository.listAll(dirUri); + assertEquals("Should list all files and directories", fileNames.length, listedFiles.length); + + for (String fileName : fileNames) { + boolean found = false; + for (String listedFile : listedFiles) { + if (fileName.equals(listedFile)) { + found = true; + break; + } + } + assertTrue("Should find file: " + fileName, found); + } + } + + @Test + public void testCopyFileFromDirectory() throws IOException { + Path tempDir = Files.createTempDirectory("blob-test"); + Path tempFile = tempDir.resolve("source-file.txt"); + String content = "Source file content"; + Files.write(tempFile, content.getBytes(StandardCharsets.UTF_8)); + + try { + Directory sourceDir = new org.apache.lucene.store.MMapDirectory(tempDir); + URI destUri = getBaseUri().resolve("copied-file.txt"); + + repository.copyFileFrom(sourceDir, "source-file.txt", destUri); + + assertTrue("Copied file should exist", repository.exists(destUri)); + + // Verify content + try (IndexInput input = + repository.openInput(getBaseUri(), "copied-file.txt", IOContext.DEFAULT)) { + byte[] buffer = new byte[1024]; + input.readBytes(buffer, 0, (int) input.length()); + String readContent = new String(buffer, 0, (int) input.length(), StandardCharsets.UTF_8); + assertEquals("Content should match", content, readContent); + } + + sourceDir.close(); + } finally { + PathUtils.deleteDirectory(tempDir); + } + } + + @Test + public void testCopyFileToDirectory() throws IOException { + URI sourceUri = getBaseUri().resolve("source-file.txt"); + String content = "Source file content"; + + try (OutputStream output = repository.createOutput(sourceUri)) { + output.write(content.getBytes(StandardCharsets.UTF_8)); + } + + Path tempDir = Files.createTempDirectory("blob-test"); + + try { + Directory destDir = new org.apache.lucene.store.MMapDirectory(tempDir); + + repository.copyFileTo(sourceUri, "source-file.txt", destDir); + + Path destFile = tempDir.resolve("source-file.txt"); + assertTrue("Destination file should exist", Files.exists(destFile)); + + String readContent = Files.readString(destFile, StandardCharsets.UTF_8); + assertEquals("Content should match", content, readContent); + + destDir.close(); + } finally { + PathUtils.deleteDirectory(tempDir); + } + } + + @Test + public void testIndexInputOutput() throws IOException { + URI fileUri = getBaseUri().resolve("index-test.txt"); + String content = "Test content for index input/output"; + + try (OutputStream output = repository.createOutput(fileUri)) { + output.write(content.getBytes(StandardCharsets.UTF_8)); + } + + try (IndexInput input = + repository.openInput(getBaseUri(), "index-test.txt", IOContext.DEFAULT)) { + byte[] buffer = new byte[(int) input.length()]; + input.readBytes(buffer, 0, buffer.length); + String readContent = new String(buffer, StandardCharsets.UTF_8); + assertEquals("Content should match", content, readContent); + } + } + + @Test + public void testChecksumVerification() throws IOException { + URI fileUri = getBaseUri().resolve("checksum-test.txt"); + String content = "Test content for checksum verification"; + + try (OutputStream output = repository.createOutput(fileUri)) { + output.write(content.getBytes(StandardCharsets.UTF_8)); + output.write("FOOTER".getBytes(StandardCharsets.UTF_8)); + } + + try (IndexInput input = + repository.openInput(getBaseUri(), "checksum-test.txt", IOContext.DEFAULT)) { + byte[] buffer = new byte[1024]; + input.readBytes(buffer, 0, (int) input.length()); + String readContent = new String(buffer, 0, (int) input.length(), StandardCharsets.UTF_8); + assertTrue("Content should contain original text", readContent.contains(content)); + } + } + + protected NamedList getBaseBackupRepositoryConfiguration() { + NamedList config = new NamedList<>(); + config.add("azure.blob.container.name", CONTAINER_NAME); + config.add("azure.blob.connection.string", getConnectionString()); + return config; + } + + @Test + public void testCanReadProvidedConfigValues() throws Exception { + final NamedList config = getBaseBackupRepositoryConfiguration(); + config.add("configKey1", "configVal1"); + config.add("configKey2", "configVal2"); + config.add("location", "foo"); + try (BackupRepository repo = getRepository()) { + repo.init(config); + assertEquals("configVal1", repo.getConfigProperty("configKey1")); + assertEquals("configVal2", repo.getConfigProperty("configKey2")); + } + } + + @Test + public void testCanChooseDefaultOrOverrideLocationValue() throws Exception { + final NamedList config = getBaseBackupRepositoryConfiguration(); + config.add("location", "foo"); + try (BackupRepository repo = getRepository()) { + repo.init(config); + assertEquals("foo", repo.getConfigProperty("location")); + } + } + + @Override + public void tearDown() throws Exception { + if (repository != null) { + repository.close(); + } + super.tearDown(); + } +} diff --git a/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobIncrementalBackupTest.java b/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobIncrementalBackupTest.java new file mode 100644 index 000000000000..417c80dc139c --- /dev/null +++ b/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobIncrementalBackupTest.java @@ -0,0 +1,199 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.azureblob; + +import java.nio.charset.StandardCharsets; +import org.junit.Test; + +public class AzureBlobIncrementalBackupTest extends AbstractAzureBlobClientTest { + + @Test + public void testIncrementalBackup() throws Exception { + String backupPath = "incremental-backup-test/"; + + createBackup(backupPath + "backup1/", "Initial backup content"); + createBackup(backupPath + "backup2/", "Incremental backup content"); + + assertTrue("Initial backup should exist", client.pathExists(backupPath + "backup1/")); + assertTrue("Incremental backup should exist", client.pathExists(backupPath + "backup2/")); + } + + @Test + public void testBackupWithMultipleFiles() throws Exception { + String backupPath = "multi-file-backup-test/"; + String[] files = {"file1.txt", "file2.txt", "file3.txt"}; + String[] contents = {"Content 1", "Content 2", "Content 3"}; + + for (int i = 0; i < files.length; i++) { + pushContent(backupPath + files[i], contents[i]); + } + + for (String file : files) { + assertTrue("File should exist: " + file, client.pathExists(backupPath + file)); + } + } + + @Test + public void testBackupWithNestedDirectories() throws Exception { + String backupPath = "nested-backup-test/"; + String[] dirs = { + backupPath + "level1/", backupPath + "level1/level2/", backupPath + "level1/level2/level3/" + }; + + for (String dir : dirs) { + client.createDirectory(dir); + } + + pushContent(backupPath + "root-file.txt", "Root file content"); + pushContent(backupPath + "level1/mid-file.txt", "Mid file content"); + pushContent(backupPath + "level1/level2/level3/deep-file.txt", "Deep file content"); + + assertTrue("Root file should exist", client.pathExists(backupPath + "root-file.txt")); + assertTrue("Mid file should exist", client.pathExists(backupPath + "level1/mid-file.txt")); + assertTrue( + "Deep file should exist", + client.pathExists(backupPath + "level1/level2/level3/deep-file.txt")); + } + + @Test + public void testBackupRestore() throws Exception { + String backupPath = "backup-restore-test/"; + String restorePath = "restore-test/"; + String originalContent = "Original backup content"; + + pushContent(backupPath + "backup-file.txt", originalContent); + + try (var input = client.pullStream(backupPath + "backup-file.txt"); + var output = client.pushStream(restorePath + "restored-file.txt")) { + byte[] buffer = new byte[1024]; + int bytesRead; + while ((bytesRead = input.read(buffer)) != -1) { + output.write(buffer, 0, bytesRead); + } + } + + assertTrue("Restored file should exist", client.pathExists(restorePath + "restored-file.txt")); + + try (var input = client.pullStream(restorePath + "restored-file.txt")) { + byte[] buffer = new byte[1024]; + int bytesRead = input.read(buffer); + String restoredContent = new String(buffer, 0, bytesRead, StandardCharsets.UTF_8); + assertEquals("Restored content should match", originalContent, restoredContent); + } + } + + @Test + public void testBackupWithLargeFiles() throws Exception { + String backupPath = "large-file-backup-test/"; + StringBuilder contentBuilder = new StringBuilder(); + for (int i = 0; i < 10000; i++) { + contentBuilder.append("This is line ").append(i).append(" of the large backup file.\n"); + } + String largeContent = contentBuilder.toString(); + + pushContent(backupPath + "large-backup.txt", largeContent); + + assertTrue( + "Large backup file should exist", client.pathExists(backupPath + "large-backup.txt")); + assertEquals( + "Large file length should match", + largeContent.length(), + client.length(backupPath + "large-backup.txt")); + } + + @Test + public void testBackupWithBinaryFiles() throws Exception { + String backupPath = "binary-backup-test/"; + byte[] binaryData = new byte[1024]; + for (int i = 0; i < binaryData.length; i++) { + binaryData[i] = (byte) (i % 256); + } + + pushContent(backupPath + "binary-backup.bin", binaryData); + + assertTrue( + "Binary backup file should exist", client.pathExists(backupPath + "binary-backup.bin")); + assertEquals( + "Binary file length should match", + binaryData.length, + client.length(backupPath + "binary-backup.bin")); + } + + @Test + public void testBackupCleanup() throws Exception { + String backupPath = "backup-cleanup-test/"; + + for (int i = 1; i <= 5; i++) { + pushContent(backupPath + "backup" + i + "/backup-file.txt", "Backup " + i + " content"); + } + + for (int i = 1; i <= 5; i++) { + assertTrue( + "Backup " + i + " should exist", client.pathExists(backupPath + "backup" + i + "/")); + } + + for (int i = 1; i <= 2; i++) { + client.deleteDirectory(backupPath + "backup" + i + "/"); + } + + for (int i = 1; i <= 2; i++) { + assertFalse( + "Old backup " + i + " should not exist", + client.pathExists(backupPath + "backup" + i + "/")); + } + for (int i = 3; i <= 5; i++) { + assertTrue( + "Recent backup " + i + " should exist", + client.pathExists(backupPath + "backup" + i + "/")); + } + } + + @Test + public void testBackupWithMetadata() throws Exception { + String backupPath = "metadata-backup-test/"; + + pushContent( + backupPath + "backup-metadata.json", + "{\"timestamp\":\"2023-01-01T00:00:00Z\",\"version\":\"1.0\"}"); + pushContent(backupPath + "backup-data.txt", "Backup data content"); + + assertTrue( + "Metadata file should exist", client.pathExists(backupPath + "backup-metadata.json")); + assertTrue("Data file should exist", client.pathExists(backupPath + "backup-data.txt")); + } + + @Test + public void testConcurrentBackups() throws Exception { + String backupPath = "concurrent-backup-test/"; + String[] backupNames = {"backup1", "backup2", "backup3"}; + String[] contents = {"Content 1", "Content 2", "Content 3"}; + + for (int i = 0; i < backupNames.length; i++) { + pushContent(backupPath + backupNames[i] + "/backup-file.txt", contents[i]); + } + + for (String backupName : backupNames) { + assertTrue( + "Backup should exist: " + backupName, client.pathExists(backupPath + backupName + "/")); + } + } + + private void createBackup(String backupPath, String content) throws AzureBlobException { + client.createDirectory(backupPath); + pushContent(backupPath + "backup-file.txt", content); + } +} diff --git a/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobIndexInputTest.java b/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobIndexInputTest.java new file mode 100644 index 000000000000..b91274fceea3 --- /dev/null +++ b/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobIndexInputTest.java @@ -0,0 +1,225 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.azureblob; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import org.junit.Test; + +public class AzureBlobIndexInputTest extends AbstractAzureBlobClientTest { + + @Test + public void testBasicIndexInput() throws Exception { + String path = "index-input-test.txt"; + String content = "Index input test content"; + + pushContent(path, content); + + try (AzureBlobIndexInput input = new AzureBlobIndexInput(path, client, client.length(path))) { + byte[] buffer = new byte[1024]; + input.readBytes(buffer, 0, content.length()); + String readContent = new String(buffer, 0, content.length(), StandardCharsets.UTF_8); + assertEquals("Content should match", content, readContent); + } + } + + @Test + public void testIndexInputSeek() throws Exception { + String path = "index-input-seek-test.txt"; + String content = "Index input seek test content"; + + pushContent(path, content); + + try (AzureBlobIndexInput input = new AzureBlobIndexInput(path, client, client.length(path))) { + long seekPosition = content.length() / 2; + input.seek(seekPosition); + + byte[] buffer = new byte[1024]; + String expectedContent = content.substring((int) seekPosition); + input.readBytes(buffer, 0, expectedContent.length()); + String readContent = new String(buffer, 0, expectedContent.length(), StandardCharsets.UTF_8); + assertEquals("Content from seek position should match", expectedContent, readContent); + } + } + + @Test + public void testIndexInputLength() throws Exception { + String path = "index-input-length-test.txt"; + String content = "Length test content"; + + pushContent(path, content); + + try (AzureBlobIndexInput input = new AzureBlobIndexInput(path, client, client.length(path))) { + assertEquals("Length should match", content.length(), input.length()); + } + } + + @Test + public void testIndexInputReadByte() throws Exception { + String path = "index-input-byte-test.txt"; + String content = "Byte read test"; + + pushContent(path, content); + + try (AzureBlobIndexInput input = new AzureBlobIndexInput(path, client, client.length(path))) { + StringBuilder readContent = new StringBuilder(); + for (int i = 0; i < content.length(); i++) { + byte b = input.readByte(); + readContent.append((char) b); + } + + assertEquals("Byte by byte content should match", content, readContent.toString()); + } + } + + @Test + public void testIndexInputReadBytes() throws Exception { + String path = "index-input-bytes-test.txt"; + String content = "Bytes read test content"; + + pushContent(path, content); + + try (AzureBlobIndexInput input = new AzureBlobIndexInput(path, client, client.length(path))) { + byte[] buffer = new byte[10]; + StringBuilder readContent = new StringBuilder(); + + long remaining = input.length(); + while (remaining > 0) { + int toRead = (int) Math.min(buffer.length, remaining); + input.readBytes(buffer, 0, toRead); + readContent.append(new String(buffer, 0, toRead, StandardCharsets.UTF_8)); + remaining -= toRead; + } + + assertEquals("Bytes content should match", content, readContent.toString()); + } + } + + @Test + public void testIndexInputSeekToEnd() throws Exception { + String path = "index-input-seek-end-test.txt"; + String content = "Seek to end test"; + + pushContent(path, content); + + try (AzureBlobIndexInput input = new AzureBlobIndexInput(path, client, client.length(path))) { + input.seek(content.length()); + expectThrows(IOException.class, input::readByte); + } + } + + @Test + public void testIndexInputSeekBeyondEnd() throws Exception { + String path = "index-input-seek-beyond-test.txt"; + String content = "Seek beyond end test"; + + pushContent(path, content); + + try (AzureBlobIndexInput input = new AzureBlobIndexInput(path, client, client.length(path))) { + long invalidPosition = content.length() + 1L; + expectThrows(IOException.class, () -> input.seek(invalidPosition)); + } + } + + @Test + public void testIndexInputGetFilePointer() throws Exception { + String path = "index-input-pointer-test.txt"; + String content = "File pointer test content"; + + pushContent(path, content); + + try (AzureBlobIndexInput input = new AzureBlobIndexInput(path, client, client.length(path))) { + assertEquals("Initial position should be 0", 0, input.getFilePointer()); + + byte[] buffer = new byte[5]; + input.readBytes(buffer, 0, buffer.length); + assertEquals("Position should be 5 after reading 5 bytes", 5, input.getFilePointer()); + + input.seek(10); + assertEquals("Position should be 10 after seek", 10, input.getFilePointer()); + } + } + + @Test + public void testIndexInputLargeFile() throws Exception { + String path = "index-input-large-test.txt"; + StringBuilder contentBuilder = new StringBuilder(); + + for (int i = 0; i < 10000; i++) { + contentBuilder.append("This is line ").append(i).append(" of the large file.\n"); + } + String content = contentBuilder.toString(); + + pushContent(path, content); + + try (AzureBlobIndexInput input = new AzureBlobIndexInput(path, client, client.length(path))) { + assertEquals("Length should match", content.length(), input.length()); + + byte[] buffer = new byte[8192]; + StringBuilder readContent = new StringBuilder(); + long remaining = input.length(); + while (remaining > 0) { + int toRead = (int) Math.min(buffer.length, remaining); + input.readBytes(buffer, 0, toRead); + readContent.append(new String(buffer, 0, toRead, StandardCharsets.UTF_8)); + remaining -= toRead; + } + + assertEquals("Large content should match", content, readContent.toString()); + } + } + + @Test + public void testIndexInputEmptyFile() throws Exception { + String path = "index-input-empty-test.txt"; + String content = ""; + + pushContent(path, content); + + try (AzureBlobIndexInput input = new AzureBlobIndexInput(path, client, client.length(path))) { + assertEquals("Length should be 0", 0, input.length()); + assertEquals("Position should be 0", 0, input.getFilePointer()); + expectThrows(IOException.class, input::readByte); + } + } + + @Test + public void testIndexInputClose() throws Exception { + String path = "index-input-close-test.txt"; + String content = "Close test content"; + + pushContent(path, content); + + AzureBlobIndexInput input = new AzureBlobIndexInput(path, client, client.length(path)); + input.close(); + + expectThrows(IOException.class, input::readByte); + expectThrows(IOException.class, () -> input.seek(0)); + } + + @Test + public void testIndexInputMultipleClose() throws Exception { + String path = "index-input-multiple-close-test.txt"; + String content = "Multiple close test content"; + + pushContent(path, content); + + AzureBlobIndexInput input = new AzureBlobIndexInput(path, client, client.length(path)); + input.close(); + input.close(); + } +} diff --git a/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobInstallShardTest.java b/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobInstallShardTest.java new file mode 100644 index 000000000000..6ad689a81a39 --- /dev/null +++ b/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobInstallShardTest.java @@ -0,0 +1,237 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.azureblob; + +import java.nio.charset.StandardCharsets; +import org.junit.Test; + +public class AzureBlobInstallShardTest extends AbstractAzureBlobClientTest { + + @Test + public void testInstallShard() throws Exception { + String shardPath = "install-shard-test/"; + + client.createDirectory(shardPath); + client.createDirectory(shardPath + "index/"); + client.createDirectory(shardPath + "conf/"); + + pushContent(shardPath + "index/segments_1", "Shard index segments"); + pushContent(shardPath + "index/_0.cfs", "Shard index file"); + pushContent(shardPath + "conf/solrconfig.xml", "Shard configuration"); + pushContent(shardPath + "conf/schema.xml", "Shard schema"); + + assertTrue("Shard directory should exist", client.pathExists(shardPath)); + assertTrue("Index directory should exist", client.pathExists(shardPath + "index/")); + assertTrue("Conf directory should exist", client.pathExists(shardPath + "conf/")); + assertTrue("Segments file should exist", client.pathExists(shardPath + "index/segments_1")); + assertTrue("Index file should exist", client.pathExists(shardPath + "index/_0.cfs")); + assertTrue("Config file should exist", client.pathExists(shardPath + "conf/solrconfig.xml")); + assertTrue("Schema file should exist", client.pathExists(shardPath + "conf/schema.xml")); + } + + @Test + public void testInstallShardWithMultipleIndexFiles() throws Exception { + String shardPath = "multi-index-shard-test/"; + String[] indexFiles = {"segments_1", "_0.cfs", "_0.cfe", "_0.si", "_1.cfs", "_1.cfe", "_1.si"}; + + client.createDirectory(shardPath); + client.createDirectory(shardPath + "index/"); + + for (String indexFile : indexFiles) { + pushContent(shardPath + "index/" + indexFile, "Index file content: " + indexFile); + } + + for (String indexFile : indexFiles) { + assertTrue( + "Index file should exist: " + indexFile, + client.pathExists(shardPath + "index/" + indexFile)); + } + } + + @Test + public void testInstallShardWithDataFiles() throws Exception { + String shardPath = "data-shard-test/"; + String[] dataFiles = { + "tlog.0000000000000000001", "tlog.0000000000000000002", "tlog.0000000000000000003" + }; + + client.createDirectory(shardPath); + client.createDirectory(shardPath + "data/"); + + for (String dataFile : dataFiles) { + pushContent(shardPath + "data/" + dataFile, "Transaction log: " + dataFile); + } + + for (String dataFile : dataFiles) { + assertTrue( + "Data file should exist: " + dataFile, client.pathExists(shardPath + "data/" + dataFile)); + } + } + + @Test + public void testInstallShardWithConfiguration() throws Exception { + String shardPath = "config-shard-test/"; + String solrConfig = + "\n" + + "\n" + + " LATEST\n" + + " \n" + + ""; + + String schema = + "\n" + + "\n" + + " \n" + + ""; + + client.createDirectory(shardPath); + client.createDirectory(shardPath + "conf/"); + + pushContent(shardPath + "conf/solrconfig.xml", solrConfig); + pushContent(shardPath + "conf/schema.xml", schema); + + assertTrue("Solr config should exist", client.pathExists(shardPath + "conf/solrconfig.xml")); + assertTrue("Schema should exist", client.pathExists(shardPath + "conf/schema.xml")); + + try (var input = client.pullStream(shardPath + "conf/solrconfig.xml")) { + byte[] buffer = new byte[1024]; + int bytesRead = input.read(buffer); + String readContent = new String(buffer, 0, bytesRead, StandardCharsets.UTF_8); + assertTrue( + "Solr config should contain expected content", + readContent.contains("luceneMatchVersion")); + } + } + + @Test + public void testInstallShardWithLargeIndex() throws Exception { + String shardPath = "large-index-shard-test/"; + StringBuilder largeContent = new StringBuilder(); + for (int i = 0; i < 50000; i++) { + largeContent.append("Index data line ").append(i).append("\n"); + } + + client.createDirectory(shardPath); + client.createDirectory(shardPath + "index/"); + + pushContent(shardPath + "index/large-index.cfs", largeContent.toString()); + + assertTrue( + "Large index file should exist", client.pathExists(shardPath + "index/large-index.cfs")); + assertEquals( + "Large index file length should match", + largeContent.length(), + client.length(shardPath + "index/large-index.cfs")); + } + + @Test + public void testInstallShardWithBinaryIndex() throws Exception { + String shardPath = "binary-index-shard-test/"; + byte[] binaryData = new byte[2048]; + for (int i = 0; i < binaryData.length; i++) { + binaryData[i] = (byte) (i % 256); + } + + client.createDirectory(shardPath); + client.createDirectory(shardPath + "index/"); + + pushContent(shardPath + "index/binary-index.cfs", binaryData); + + assertTrue( + "Binary index file should exist", client.pathExists(shardPath + "index/binary-index.cfs")); + assertEquals( + "Binary index file length should match", + binaryData.length, + client.length(shardPath + "index/binary-index.cfs")); + } + + @Test + public void testInstallShardWithNestedStructure() throws Exception { + String shardPath = "nested-shard-test/"; + + client.createDirectory(shardPath); + client.createDirectory(shardPath + "index/"); + client.createDirectory(shardPath + "conf/"); + client.createDirectory(shardPath + "data/"); + client.createDirectory(shardPath + "logs/"); + + pushContent(shardPath + "index/segments_1", "Segments file"); + pushContent(shardPath + "conf/solrconfig.xml", "Config file"); + pushContent(shardPath + "data/tlog.1", "Transaction log"); + pushContent(shardPath + "logs/solr.log", "Log file"); + + assertTrue("Root shard should exist", client.pathExists(shardPath)); + assertTrue("Index directory should exist", client.pathExists(shardPath + "index/")); + assertTrue("Conf directory should exist", client.pathExists(shardPath + "conf/")); + assertTrue("Data directory should exist", client.pathExists(shardPath + "data/")); + assertTrue("Logs directory should exist", client.pathExists(shardPath + "logs/")); + assertTrue("Segments file should exist", client.pathExists(shardPath + "index/segments_1")); + assertTrue("Config file should exist", client.pathExists(shardPath + "conf/solrconfig.xml")); + assertTrue("Transaction log should exist", client.pathExists(shardPath + "data/tlog.1")); + assertTrue("Log file should exist", client.pathExists(shardPath + "logs/solr.log")); + } + + @Test + public void testInstallShardWithMetadata() throws Exception { + String shardPath = "metadata-shard-test/"; + String metadata = + "{\n" + + " \"shardId\": \"shard1\",\n" + + " \"coreName\": \"test-core\",\n" + + " \"version\": \"1.0\",\n" + + " \"timestamp\": \"2023-01-01T00:00:00Z\"\n" + + "}"; + + client.createDirectory(shardPath); + + pushContent(shardPath + "shard-metadata.json", metadata); + pushContent(shardPath + "index/segments_1", "Index segments"); + + assertTrue("Metadata file should exist", client.pathExists(shardPath + "shard-metadata.json")); + assertTrue("Index file should exist", client.pathExists(shardPath + "index/segments_1")); + + try (var input = client.pullStream(shardPath + "shard-metadata.json")) { + byte[] buffer = new byte[1024]; + int bytesRead = input.read(buffer); + String readContent = new String(buffer, 0, bytesRead, StandardCharsets.UTF_8); + assertTrue("Metadata should contain shard ID", readContent.contains("shard1")); + assertTrue("Metadata should contain core name", readContent.contains("test-core")); + } + } + + @Test + public void testInstallShardCleanup() throws Exception { + String shardPath = "cleanup-shard-test/"; + + client.createDirectory(shardPath); + client.createDirectory(shardPath + "index/"); + client.createDirectory(shardPath + "conf/"); + + pushContent(shardPath + "index/segments_1", "Index segments"); + pushContent(shardPath + "conf/solrconfig.xml", "Config file"); + + assertTrue("Shard should exist", client.pathExists(shardPath)); + + client.deleteDirectory(shardPath); + + assertFalse("Shard should not exist after cleanup", client.pathExists(shardPath)); + assertFalse( + "Index directory should not exist after cleanup", client.pathExists(shardPath + "index/")); + assertFalse( + "Conf directory should not exist after cleanup", client.pathExists(shardPath + "conf/")); + } +} diff --git a/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobOutputStreamTest.java b/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobOutputStreamTest.java new file mode 100644 index 000000000000..dbfcb9a9ca5d --- /dev/null +++ b/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobOutputStreamTest.java @@ -0,0 +1,228 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.azureblob; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.charset.StandardCharsets; +import org.junit.Test; + +public class AzureBlobOutputStreamTest extends AbstractAzureBlobClientTest { + + @Test + public void testBasicOutputStream() throws Exception { + String path = "output-stream-test.txt"; + String content = "Output stream test content"; + + try (OutputStream output = client.pushStream(path)) { + output.write(content.getBytes(StandardCharsets.UTF_8)); + } + + assertTrue("File should exist", client.pathExists(path)); + + try (InputStream input = client.pullStream(path)) { + byte[] buffer = new byte[1024]; + int bytesRead = input.read(buffer); + String readContent = new String(buffer, 0, bytesRead, StandardCharsets.UTF_8); + assertEquals("Content should match", content, readContent); + } + } + + @Test + public void testOutputStreamWriteByte() throws Exception { + String path = "output-stream-byte-test.txt"; + String content = "Byte by byte write test"; + + try (OutputStream output = client.pushStream(path)) { + for (byte b : content.getBytes(StandardCharsets.UTF_8)) { + output.write(b); + } + } + + assertTrue("File should exist", client.pathExists(path)); + + try (InputStream input = client.pullStream(path)) { + byte[] buffer = new byte[1024]; + int bytesRead = input.read(buffer); + String readContent = new String(buffer, 0, bytesRead, StandardCharsets.UTF_8); + assertEquals("Content should match", content, readContent); + } + } + + @Test + public void testOutputStreamWriteByteArray() throws Exception { + String path = "output-stream-array-test.txt"; + String content = "Byte array write test"; + byte[] contentBytes = content.getBytes(StandardCharsets.UTF_8); + + try (OutputStream output = client.pushStream(path)) { + output.write(contentBytes); + } + + assertTrue("File should exist", client.pathExists(path)); + + try (InputStream input = client.pullStream(path)) { + byte[] buffer = new byte[1024]; + int bytesRead = input.read(buffer); + String readContent = new String(buffer, 0, bytesRead, StandardCharsets.UTF_8); + assertEquals("Content should match", content, readContent); + } + } + + @Test + public void testOutputStreamWriteByteArrayWithOffset() throws Exception { + String path = "output-stream-offset-test.txt"; + String fullContent = "Full content for offset test"; + String partialContent = "offset test"; // Last part + byte[] fullBytes = fullContent.getBytes(StandardCharsets.UTF_8); + int offset = fullContent.indexOf(partialContent); + + try (OutputStream output = client.pushStream(path)) { + output.write(fullBytes, offset, partialContent.length()); + } + + assertTrue("File should exist", client.pathExists(path)); + + try (InputStream input = client.pullStream(path)) { + byte[] buffer = new byte[1024]; + int bytesRead = input.read(buffer); + String readContent = new String(buffer, 0, bytesRead, StandardCharsets.UTF_8); + assertEquals("Content should match", partialContent, readContent); + } + } + + @Test + public void testOutputStreamFlush() throws Exception { + String path = "output-stream-flush-test.txt"; + String content = "Flush test content"; + + try (OutputStream output = client.pushStream(path)) { + output.write(content.getBytes(StandardCharsets.UTF_8)); + output.flush(); + assertTrue("File should exist after flush", client.pathExists(path)); + } + } + + @Test + public void testOutputStreamClose() throws Exception { + String path = "output-stream-close-test.txt"; + String content = "Close test content"; + + OutputStream output = client.pushStream(path); + output.write(content.getBytes(StandardCharsets.UTF_8)); + output.close(); + + assertTrue("File should exist after close", client.pathExists(path)); + + OutputStream closedOutput = output; + expectThrows(IOException.class, () -> closedOutput.write(1)); + expectThrows(IOException.class, () -> closedOutput.flush()); + } + + @Test + public void testOutputStreamMultipleClose() throws Exception { + String path = "output-stream-multiple-close-test.txt"; + String content = "Multiple close test content"; + + OutputStream output = client.pushStream(path); + output.write(content.getBytes(StandardCharsets.UTF_8)); + output.close(); + output.close(); + + assertTrue("File should exist", client.pathExists(path)); + } + + @Test + public void testOutputStreamLargeData() throws Exception { + String path = "output-stream-large-test.txt"; + StringBuilder contentBuilder = new StringBuilder(); + + for (int i = 0; i < 20000; i++) { + contentBuilder.append("This is line ").append(i).append(" of the large file.\n"); + } + String content = contentBuilder.toString(); + + try (OutputStream output = client.pushStream(path)) { + output.write(content.getBytes(StandardCharsets.UTF_8)); + } + + assertTrue("Large file should exist", client.pathExists(path)); + assertEquals("File length should match", content.length(), client.length(path)); + + try (InputStream input = client.pullStream(path)) { + byte[] buffer = new byte[8192]; + StringBuilder readContentBuilder = new StringBuilder(); + int bytesRead; + while ((bytesRead = input.read(buffer)) != -1) { + readContentBuilder.append(new String(buffer, 0, bytesRead, StandardCharsets.UTF_8)); + } + assertEquals("Large content should match", content, readContentBuilder.toString()); + } + } + + @Test + public void testOutputStreamChunkedWrite() throws Exception { + String path = "output-stream-chunked-test.txt"; + String content = "Chunked write test content"; + byte[] contentBytes = content.getBytes(StandardCharsets.UTF_8); + + try (OutputStream output = client.pushStream(path)) { + int chunkSize = 5; + for (int i = 0; i < contentBytes.length; i += chunkSize) { + int remaining = Math.min(chunkSize, contentBytes.length - i); + output.write(contentBytes, i, remaining); + } + } + + assertTrue("File should exist", client.pathExists(path)); + + try (InputStream input = client.pullStream(path)) { + byte[] buffer = new byte[1024]; + int bytesRead = input.read(buffer); + String readContent = new String(buffer, 0, bytesRead, StandardCharsets.UTF_8); + assertEquals("Chunked content should match", content, readContent); + } + } + + @Test + public void testOutputStreamBinaryData() throws Exception { + String path = "output-stream-binary-test.bin"; + byte[] binaryData = new byte[1024]; + + for (int i = 0; i < binaryData.length; i++) { + binaryData[i] = (byte) (i % 256); + } + + try (OutputStream output = client.pushStream(path)) { + output.write(binaryData); + } + + assertTrue("Binary file should exist", client.pathExists(path)); + assertEquals("Binary file length should match", binaryData.length, client.length(path)); + + try (InputStream input = client.pullStream(path)) { + byte[] readData = new byte[binaryData.length]; + int bytesRead = input.read(readData); + assertEquals("Should read all bytes", binaryData.length, bytesRead); + + for (int i = 0; i < binaryData.length; i++) { + assertEquals("Binary data should match at position " + i, binaryData[i], readData[i]); + } + } + } +} diff --git a/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobPathsTest.java b/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobPathsTest.java new file mode 100644 index 000000000000..2991340f868a --- /dev/null +++ b/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobPathsTest.java @@ -0,0 +1,273 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.azureblob; + +import org.junit.Test; + +public class AzureBlobPathsTest extends AbstractAzureBlobClientTest { + + @Test + public void testPathExists() throws Exception { + String path = "path-exists-test-" + java.util.UUID.randomUUID() + ".txt"; + + assertFalse("Path should not exist initially", client.pathExists(path)); + + pushContent(path, "test content"); + + assertTrue("Path should exist after creation", client.pathExists(path)); + } + + @Test + public void testDirectoryExists() throws Exception { + String dirPath = "test-directory-" + java.util.UUID.randomUUID() + "/"; + + assertFalse("Directory should not exist initially", client.pathExists(dirPath)); + + client.createDirectory(dirPath); + + assertTrue("Directory should exist after creation", client.pathExists(dirPath)); + } + + @Test + public void testIsDirectory() throws Exception { + String dirPath = "is-directory-test/"; + String filePath = "is-directory-test.txt"; + + client.createDirectory(dirPath); + assertTrue("Should be a directory", client.isDirectory(dirPath)); + + pushContent(filePath, "test content"); + assertFalse("Should not be a directory", client.isDirectory(filePath)); + } + + @Test + public void testFileLength() throws Exception { + String path = "file-length-test.txt"; + String content = "File length test content"; + + pushContent(path, content); + + assertEquals("File length should match", content.length(), client.length(path)); + } + + @Test + public void testDirectoryLength() throws Exception { + String dirPath = "directory-length-test/"; + + client.createDirectory(dirPath); + + expectThrows(AzureBlobException.class, () -> client.length(dirPath)); + } + + @Test + public void testListDirectory() throws Exception { + String dirPath = "list-directory-test/"; + + client.createDirectory(dirPath); + + String[] files = client.listDir(dirPath); + assertEquals("Directory should be empty initially", 0, files.length); + + String[] fileNames = {"file1.txt", "file2.txt", "subdir/"}; + for (String fileName : fileNames) { + String fullPath = dirPath + fileName; + if (fileName.endsWith("/")) { + client.createDirectory(fullPath); + } else { + pushContent(fullPath, "Content of " + fileName); + } + } + + files = client.listDir(dirPath); + assertEquals("Should list all files and directories", fileNames.length, files.length); + + for (String fileName : fileNames) { + boolean found = false; + for (String listedFile : files) { + if (fileName.equals(listedFile)) { + found = true; + break; + } + } + assertTrue("Should find file: " + fileName, found); + } + } + + @Test + public void testListAll() throws Exception { + String dirPath = "list-all-test/"; + + client.createDirectory(dirPath); + client.createDirectory(dirPath + "subdir1/"); + client.createDirectory(dirPath + "subdir2/"); + + pushContent(dirPath + "file1.txt", "Content 1"); + pushContent(dirPath + "file2.txt", "Content 2"); + pushContent(dirPath + "subdir1/file3.txt", "Content 3"); + pushContent(dirPath + "subdir2/file4.txt", "Content 4"); + + java.util.Set allFiles = new java.util.HashSet<>(); + listAllRecursive(dirPath, allFiles); + + assertTrue("Should find file1.txt", allFiles.contains(dirPath + "file1.txt")); + assertTrue("Should find file2.txt", allFiles.contains(dirPath + "file2.txt")); + assertTrue("Should find subdir1/file3.txt", allFiles.contains(dirPath + "subdir1/file3.txt")); + assertTrue("Should find subdir2/file4.txt", allFiles.contains(dirPath + "subdir2/file4.txt")); + } + + private void listAllRecursive(String dirPath, java.util.Set allFiles) + throws AzureBlobException { + String[] files = client.listDir(dirPath); + for (String file : files) { + String fullPath = dirPath + file; + if (file.endsWith("/")) { + // It's a directory + allFiles.add(fullPath); + listAllRecursive(fullPath, allFiles); + } else { + // It's a file + allFiles.add(fullPath); + } + } + } + + @Test + public void testDeleteFile() throws Exception { + String path = "delete-file-test.txt"; + + pushContent(path, "test content"); + assertTrue("File should exist", client.pathExists(path)); + + client.delete(java.util.Set.of(path)); + + assertFalse("File should not exist after deletion", client.pathExists(path)); + } + + @Test + public void testDeleteDirectory() throws Exception { + String dirPath = "delete-directory-test/"; + String filePath = dirPath + "nested-file.txt"; + + client.createDirectory(dirPath); + pushContent(filePath, "nested content"); + + assertTrue("Directory should exist", client.pathExists(dirPath)); + assertTrue("File should exist", client.pathExists(filePath)); + + client.deleteDirectory(dirPath); + + assertFalse("Directory should not exist after deletion", client.pathExists(dirPath)); + assertFalse("File should not exist after deletion", client.pathExists(filePath)); + } + + @Test + public void testDeleteNonExistentFile() throws Exception { + String path = "non-existent-file.txt"; + + assertFalse("File should not exist", client.pathExists(path)); + + client.delete(java.util.Set.of(path)); + } + + @Test + public void testDeleteNonExistentDirectory() throws Exception { + String dirPath = "non-existent-directory/"; + + assertFalse("Directory should not exist", client.pathExists(dirPath)); + + client.deleteDirectory(dirPath); + } + + @Test + public void testNestedDirectories() throws Exception { + String rootDir = "nested-test/"; + String subDir1 = rootDir + "subdir1/"; + String subDir2 = rootDir + "subdir2/"; + String deepDir = subDir1 + "deepdir/"; + + client.createDirectory(rootDir); + client.createDirectory(subDir1); + client.createDirectory(subDir2); + client.createDirectory(deepDir); + + assertTrue("Root directory should exist", client.pathExists(rootDir)); + assertTrue("Sub directory 1 should exist", client.pathExists(subDir1)); + assertTrue("Sub directory 2 should exist", client.pathExists(subDir2)); + assertTrue("Deep directory should exist", client.pathExists(deepDir)); + + pushContent(rootDir + "root-file.txt", "Root file content"); + pushContent(subDir1 + "sub-file.txt", "Sub file content"); + pushContent(deepDir + "deep-file.txt", "Deep file content"); + + assertTrue("Root file should exist", client.pathExists(rootDir + "root-file.txt")); + assertTrue("Sub file should exist", client.pathExists(subDir1 + "sub-file.txt")); + assertTrue("Deep file should exist", client.pathExists(deepDir + "deep-file.txt")); + } + + @Test + public void testPathSanitization() throws Exception { + String[] testPaths = { + "simple-file.txt", + "/leading-slash.txt", + "trailing-slash/", + "/both-slashes/", + "nested/path/file.txt", + "//double-slash.txt", + " spaced-file.txt ", + "special-chars!@#$%^&*().txt" + }; + + for (String testPath : testPaths) { + String sanitizedPath = client.sanitizedPath(testPath); + assertNotNull("Sanitized path should not be null", sanitizedPath); + assertFalse("Sanitized path should not start with slash", sanitizedPath.startsWith("/")); + } + } + + @Test + public void testFilePathSanitization() throws Exception { + String[] validFilePaths = { + "simple-file.txt", "nested/path/file.txt", "file-with-dashes.txt", "file_with_underscores.txt" + }; + + for (String filePath : validFilePaths) { + String sanitizedPath = client.sanitizedFilePath(filePath); + assertNotNull("Sanitized file path should not be null", sanitizedPath); + assertFalse("Sanitized file path should not end with slash", sanitizedPath.endsWith("/")); + } + + String[] invalidFilePaths = {"file-with-trailing-slash/", "", " "}; + + for (String filePath : invalidFilePaths) { + final String path = filePath; + expectThrows(AzureBlobException.class, () -> client.sanitizedFilePath(path)); + } + } + + @Test + public void testDirectoryPathSanitization() throws Exception { + String[] testDirPaths = { + "simple-dir", "nested/path/dir", "dir-with-dashes", "dir_with_underscores" + }; + + for (String dirPath : testDirPaths) { + String sanitizedPath = client.sanitizedDirPath(dirPath); + assertNotNull("Sanitized directory path should not be null", sanitizedPath); + assertTrue("Sanitized directory path should end with slash", sanitizedPath.endsWith("/")); + } + } +} diff --git a/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobReadWriteTest.java b/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobReadWriteTest.java new file mode 100644 index 000000000000..33f0a2177855 --- /dev/null +++ b/solr/modules/azure-blob-repository/src/test/org/apache/solr/azureblob/AzureBlobReadWriteTest.java @@ -0,0 +1,246 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.azureblob; + +import com.carrotsearch.randomizedtesting.generators.RandomBytes; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.charset.StandardCharsets; +import org.junit.Test; + +public class AzureBlobReadWriteTest extends AbstractAzureBlobClientTest { + + @Test + public void testBasicReadWrite() throws Exception { + String path = "test-file.txt"; + String content = "Hello, Azure Blob Storage!"; + + pushContent(path, content); + + try (InputStream input = client.pullStream(path)) { + byte[] buffer = new byte[1024]; + int bytesRead = input.read(buffer); + String readContent = new String(buffer, 0, bytesRead, StandardCharsets.UTF_8); + assertEquals("Content should match", content, readContent); + } + } + + @Test + public void testLargeFileReadWrite() throws Exception { + String path = "large-file.txt"; + StringBuilder contentBuilder = new StringBuilder(); + + for (int i = 0; i < 10000; i++) { + contentBuilder.append("This is line ").append(i).append(" of the large file.\n"); + } + String content = contentBuilder.toString(); + + pushContent(path, content); + + assertTrue("File should exist", client.pathExists(path)); + assertEquals("File length should match", content.length(), client.length(path)); + + try (InputStream input = client.pullStream(path)) { + byte[] buffer = new byte[8192]; + StringBuilder readContentBuilder = new StringBuilder(); + int bytesRead; + while ((bytesRead = input.read(buffer)) != -1) { + readContentBuilder.append(new String(buffer, 0, bytesRead, StandardCharsets.UTF_8)); + } + assertEquals("Content should match", content, readContentBuilder.toString()); + } + } + + @Test + public void testBinaryDataReadWrite() throws Exception { + String path = "binary-file.bin"; + byte[] binaryData = new byte[1024]; + + for (int i = 0; i < binaryData.length; i++) { + binaryData[i] = (byte) (i % 256); + } + + pushContent(path, binaryData); + + try (InputStream input = client.pullStream(path)) { + byte[] readData = new byte[binaryData.length]; + int bytesRead = input.read(readData); + assertEquals("Should read all bytes", binaryData.length, bytesRead); + + for (int i = 0; i < binaryData.length; i++) { + assertEquals("Binary data should match at position " + i, binaryData[i], readData[i]); + } + } + } + + @Test + public void testConcurrentReadWrite() throws Exception { + String path = "concurrent-file.txt"; + String content = "Concurrent read/write test content"; + + pushContent(path, content); + + try (InputStream input1 = client.pullStream(path); + InputStream input2 = client.pullStream(path)) { + + byte[] buffer1 = new byte[1024]; + byte[] buffer2 = new byte[1024]; + + int bytesRead1 = input1.read(buffer1); + int bytesRead2 = input2.read(buffer2); + + String readContent1 = new String(buffer1, 0, bytesRead1, StandardCharsets.UTF_8); + String readContent2 = new String(buffer2, 0, bytesRead2, StandardCharsets.UTF_8); + + assertEquals("Both reads should get same content", readContent1, readContent2); + assertEquals("Content should match original", content, readContent1); + } + } + + @Test + public void testStreamClose() throws Exception { + String path = "stream-close-test.txt"; + String content = "Stream close test content"; + + pushContent(path, content); + + InputStream input = client.pullStream(path); + input.close(); + input.close(); + + int firstByte = input.read(); + assertTrue( + "Stream should be resumable after close (got byte: " + firstByte + ")", + firstByte >= 0 || firstByte == -1); + + input.close(); + } + + @Test + public void testEmptyFileReadWrite() throws Exception { + String path = "empty-file.txt"; + String content = ""; + + pushContent(path, content); + + assertTrue("Empty file should exist", client.pathExists(path)); + assertEquals("Empty file should have zero length", 0, client.length(path)); + + try (InputStream input = client.pullStream(path)) { + int bytesRead = input.read(); + assertEquals("Should return -1 for empty file", -1, bytesRead); + } + } + + @Test + public void testUnicodeContentReadWrite() throws Exception { + String path = "unicode-file.txt"; + String content = "Hello 世界! 🌍 Unicode test: αβγδε"; + + pushContent(path, content); + + try (InputStream input = client.pullStream(path)) { + byte[] buffer = new byte[1024]; + int bytesRead = input.read(buffer); + String readContent = new String(buffer, 0, bytesRead, StandardCharsets.UTF_8); + assertEquals("Unicode content should match", content, readContent); + } + } + + @Test + public void testOutputStreamFlush() throws Exception { + String path = "flush-test.txt"; + String content = "Flush test content"; + + try (OutputStream output = client.pushStream(path)) { + output.write(content.getBytes(StandardCharsets.UTF_8)); + output.flush(); + } + + assertTrue("File should exist after flush", client.pathExists(path)); + + try (InputStream input = client.pullStream(path)) { + byte[] buffer = new byte[1024]; + int bytesRead = input.read(buffer); + String readContent = new String(buffer, 0, bytesRead, StandardCharsets.UTF_8); + assertEquals("Content should match after flush", content, readContent); + } + } + + @Test + public void testReadWithConnectionLoss() throws Exception { + String key = "flush-very-large"; + + int numBytes = 2_000_000; + pushContent(key, RandomBytes.randomBytesOfLength(random(), numBytes)); + + int numExceptions = 5; + int bytesPerException = numBytes / numExceptions; + + int maxBuffer = 100; + byte[] buffer = new byte[maxBuffer]; + boolean done = false; + try (InputStream input = client.pullStream(key)) { + long byteCount = 0; + long lastResetBucket = -1; + while (!done) { + int numBytesToRead = random().nextInt(maxBuffer) + 1; + switch (random().nextInt(3)) { + case 0: + { + for (int i = 0; i < numBytesToRead && !done; i++) { + done = input.read() == -1; + if (!done) { + byteCount++; + } + } + } + break; + case 1: + { + int readLen = input.read(buffer, 0, numBytesToRead); + if (readLen > 0) { + byteCount += readLen; + } else { + done = true; + } + } + break; + case 2: + { + long bytesSkipped = input.skip(numBytesToRead); + byteCount += bytesSkipped; + if (bytesSkipped < numBytesToRead) { + done = true; + } + } + break; + } + + // Initiate a connection loss at the beginning of every "bytesPerException" cycle. + // The input stream will not immediately see an error, it will have pre-loaded some data. + long currentBucket = byteCount / bytesPerException; + if (currentBucket != lastResetBucket && (byteCount % bytesPerException <= maxBuffer)) { + initiateBlobConnectionLoss(); + lastResetBucket = currentBucket; + } + } + + assertEquals("Wrong amount of data found from InputStream", numBytes, byteCount); + } + } +} diff --git a/solr/server/etc/security.policy b/solr/server/etc/security.policy index f932cc0b461c..25ce5a337ff1 100644 --- a/solr/server/etc/security.policy +++ b/solr/server/etc/security.policy @@ -221,6 +221,9 @@ grant { }; // Permissions for OTEL Runtime Java 17 telemetry and metrics +// Also needed for Reactor (used by Azure SDK with OkHttp) grant { permission jdk.jfr.FlightRecorderPermission "accessFlightRecorder"; + permission jdk.jfr.FlightRecorderPermission "registerEvent"; + permission java.lang.RuntimePermission "accessClassInPackage.jdk.jfr.internal.event"; }; diff --git a/solr/solr-ref-guide/modules/deployment-guide/pages/backup-restore.adoc b/solr/solr-ref-guide/modules/deployment-guide/pages/backup-restore.adoc index 12297153e9c2..52d85202e955 100644 --- a/solr/solr-ref-guide/modules/deployment-guide/pages/backup-restore.adoc +++ b/solr/solr-ref-guide/modules/deployment-guide/pages/backup-restore.adoc @@ -383,7 +383,7 @@ If the status is anything other than "success", an error message will explain wh Solr provides a repository abstraction to allow users to backup and restore their data to a variety of different storage systems. For example, a Solr cluster running on a local filesystem (e.g., EXT3) can store backup data on the same disk, on a remote network-mounted drive, or in some popular "cloud storage" providers, depending on the 'repository' implementation chosen. -Solr offers multiple different repository implementations out of the box (`LocalFileSystemRepository`, `GCSBackupRepository` and `S3BackupRepository`), and allows users to create plugins for their own storage systems as needed. It is also possible to create a `DelegatingBackupRepository` that delegates to another `BackupRepository` and adds or modifies some behavior on top of it. +Solr offers multiple different repository implementations out of the box (`LocalFileSystemRepository`, `GCSBackupRepository`, `S3BackupRepository`, and `AzureBlobBackupRepository`), and allows users to create plugins for their own storage systems as needed. It is also possible to create a `DelegatingBackupRepository` that delegates to another `BackupRepository` and adds or modifies some behavior on top of it. Users can define any number of repositories in their `solr.xml` file. The backup and restore APIs described above allow users to select which of these definitions they want to use at runtime via the `repository` parameter. @@ -794,3 +794,116 @@ https://docs.aws.amazon.com/sdkref/latest/guide/settings-global.html[These optio * Retries ** RetryMode (`LEGACY`, `STANDARD`, `ADAPTIVE`) ** Max Attempts + +=== AzureBlobBackupRepository + +Stores and retrieves backup files in a Microsoft Azure Blob Storage container. + +This is provided via the `azure-blob-repository` xref:configuration-guide:solr-modules.adoc[Solr Module] that needs to be enabled before use. + +This plugin supports multiple authentication methods: connection strings, account keys, SAS tokens, and Azure Identity (Managed Identity, Service Principal, Azure CLI). +For Azure Identity, ensure the identity has the "Storage Blob Data Contributor" role on the storage account. + +[source,xml] +---- + + + solr-backup + DefaultEndpointsProtocol=https;AccountName=myaccount;AccountKey=mykey;EndpointSuffix=core.windows.net + + +---- + +AzureBlobBackupRepository accepts the following options for configuration: + +`azure.blob.container.name`:: ++ +[%autowidth,frame=none] +|=== +|Required |Default: none +|=== ++ +The name of the Azure Blob Storage container. The container must exist before performing backup operations. + +`azure.blob.connection.string`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: none +|=== ++ +Complete Azure Storage connection string. Mutually exclusive with other authentication methods. + +`azure.blob.account.name`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: none +|=== ++ +Azure Storage account name. Used with account key or SAS token authentication. + +`azure.blob.account.key`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: none +|=== ++ +Azure Storage account access key. Mutually exclusive with SAS token and Azure Identity. + +`azure.blob.sas.token`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: none +|=== ++ +SAS token for time-limited access. Must include `srt=sco` and `sp=rwdlac` permissions. +The `&` characters must be XML-escaped as `&`. + +`azure.blob.endpoint`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: none +|=== ++ +Azure Blob Storage endpoint URL (e.g., `https://myaccount.blob.core.windows.net`). +Required for Azure Identity authentication. + +`azure.blob.tenant.id`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: none +|=== ++ +Azure AD tenant ID for Service Principal authentication. + +`azure.blob.client.id`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: none +|=== ++ +Azure AD application (client) ID for Service Principal authentication. + +`azure.blob.client.secret`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: none +|=== ++ +Azure AD application secret for Service Principal authentication. + +`location`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: none +|=== ++ +Default path prefix within the container for backup storage.